From 18dbad5edb59ffec256d9f07233a54bc99ac81d4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 15 Nov 2023 19:22:12 -0800 Subject: [PATCH] Use Sigmoid to normalize cross-encoder score between 0-1 - While sigmoid normalization isn't required for reranking. Normalizing score to distance metrics for both encoder and cross encoder scores is useful to reason about them - Softmax wasn't required as don't need probabilities, sigmoid is good enough to get distance metric --- src/database/adapters/__init__.py | 4 ---- src/khoj/processor/embeddings.py | 5 +++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/database/adapters/__init__.py b/src/database/adapters/__init__.py index ea391105..951bf632 100644 --- a/src/database/adapters/__init__.py +++ b/src/database/adapters/__init__.py @@ -11,10 +11,6 @@ from pgvector.django import CosineDistance from django.db.models.manager import BaseManager from django.db.models import Q from torch import Tensor -from pgvector.django import CosineDistance -from django.db.models.manager import BaseManager -from django.db.models import Q -from torch import Tensor # Import sync_to_async from Django Channels from asgiref.sync import sync_to_async diff --git a/src/khoj/processor/embeddings.py b/src/khoj/processor/embeddings.py index 59a61d05..392d402f 100644 --- a/src/khoj/processor/embeddings.py +++ b/src/khoj/processor/embeddings.py @@ -1,6 +1,7 @@ from typing import List from sentence_transformers import SentenceTransformer, CrossEncoder +from torch import nn from khoj.utils.helpers import get_device from khoj.utils.rawconfig import SearchResponse @@ -26,6 +27,6 @@ class CrossEncoderModel: self.cross_encoder_model = CrossEncoder(model_name=self.model_name, device=get_device()) def predict(self, query, hits: List[SearchResponse], key: str = "compiled"): - cross__inp = [[query, hit.additional[key]] for hit in hits] - cross_scores = self.cross_encoder_model.predict(cross__inp, apply_softmax=True) + cross_inp = [[query, hit.additional[key]] for hit in hits] + cross_scores = self.cross_encoder_model.predict(cross_inp, activation_fct=nn.Sigmoid()) return cross_scores