mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 21:29:11 +00:00
Use hashed value to improve deduplication of search results on server
This commit is contained in:
@@ -132,11 +132,13 @@ async def query(
|
|||||||
|
|
||||||
def collate_results(hits, dedupe=True):
|
def collate_results(hits, dedupe=True):
|
||||||
hit_ids = set()
|
hit_ids = set()
|
||||||
|
hit_hashes = set()
|
||||||
for hit in hits:
|
for hit in hits:
|
||||||
if dedupe and hit.corpus_id in hit_ids:
|
if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
hit_hashes.add(hit.hashed_value)
|
||||||
hit_ids.add(hit.corpus_id)
|
hit_ids.add(hit.corpus_id)
|
||||||
yield SearchResponse.model_validate(
|
yield SearchResponse.model_validate(
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user