From 9c321ac070bb52d452d3a87ee12f7e446a1e89f0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sat, 11 Nov 2023 14:28:41 -0800 Subject: [PATCH] Fix cross encoder to use softmax to convert it to a distance metric --- src/khoj/processor/embeddings.py | 2 +- src/khoj/search_type/text_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/embeddings.py b/src/khoj/processor/embeddings.py index 1e92f27d..a4daa24f 100644 --- a/src/khoj/processor/embeddings.py +++ b/src/khoj/processor/embeddings.py @@ -27,5 +27,5 @@ class CrossEncoderModel: def predict(self, query, hits: List[SearchResponse]): cross__inp = [[query, hit.additional["compiled"]] for hit in hits] - cross_scores = self.cross_encoder_model.predict(cross__inp) + cross_scores = self.cross_encoder_model.predict(cross__inp, apply_softmax=True) return cross_scores diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index 041c385f..d6f27cea 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -224,7 +224,7 @@ def cross_encoder_score(query: str, hits: List[SearchResponse]) -> List[SearchRe # Convert cross-encoder scores to distances and pass in hits for reranking for idx in range(len(cross_scores)): - hits[idx]["cross_score"] = -1 * cross_scores[idx] + hits[idx]["cross_score"] = 1 - cross_scores[idx] return hits