From 2930b57c781a7947b30d71a2220907f84226d0ab Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 16 Jun 2024 16:22:16 +0530 Subject: [PATCH] Use hashed value to improve deduplication of search results on server --- src/khoj/search_type/text_search.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index 27899bd3..f3ce7110 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -132,11 +132,13 @@ async def query( def collate_results(hits, dedupe=True): hit_ids = set() + hit_hashes = set() for hit in hits: - if dedupe and hit.corpus_id in hit_ids: + if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids): continue else: + hit_hashes.add(hit.hashed_value) hit_ids.add(hit.corpus_id) yield SearchResponse.model_validate( {