mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Use hashed value to improve deduplication of search results on server
This commit is contained in:
@@ -132,11 +132,13 @@ async def query(
|
||||
|
||||
def collate_results(hits, dedupe=True):
|
||||
hit_ids = set()
|
||||
hit_hashes = set()
|
||||
for hit in hits:
|
||||
if dedupe and hit.corpus_id in hit_ids:
|
||||
if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids):
|
||||
continue
|
||||
|
||||
else:
|
||||
hit_hashes.add(hit.hashed_value)
|
||||
hit_ids.add(hit.corpus_id)
|
||||
yield SearchResponse.model_validate(
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user