Make filters to apply before semantic search configurable

Details
--
- The filters to apply are configured for each type in the search controller
- Muliple filters can be applied on the query, entries etc before search
- The asymmetric query method now just applies the passed filters to the
  query, entries and embeddings before semantic search is performed

Reason
--
This abstraction will simplify adding other pre-search filters. E.g datetime filter
This commit is contained in:
Debanjum Singh Solanky
2022-07-13 16:29:23 +04:00
parent c92789d20a
commit b82aef26bf
2 changed files with 7 additions and 5 deletions

View File

@@ -94,7 +94,7 @@ def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False, d
return corpus_embeddings
def query(raw_query: str, model: TextSearchModel, device=torch.device('cpu')):
def query(raw_query: str, model: TextSearchModel, device=torch.device('cpu'), filters: list = []):
"Search all notes for entries that answer the query"
# Copy original embeddings, entries to filter them for query
@@ -102,8 +102,9 @@ def query(raw_query: str, model: TextSearchModel, device=torch.device('cpu')):
corpus_embeddings = deepcopy(model.corpus_embeddings)
entries = deepcopy(model.entries)
# Filter to entries that contain all required_words and no blocked_words
query, entries, corpus_embeddings = explicit_filter(query, entries, corpus_embeddings)
# Filter query, entries and embeddings before semantic search
for filter in filters:
query, entries, corpus_embeddings = filter(query, entries, corpus_embeddings)
if entries is None or len(entries) == 0:
return {}