mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-05 21:29:11 +00:00
Deep copy entries, embeddings in filters. Defer till actual filtering
- Only the filter knows when entries, embeddings are to be manipulated. So move the responsibility to deep copy before manipulating entries, embeddings to the filters - Create deep copy in filters. Avoids creating deep copy of entries, embeddings when filter results are being loaded from cache etc
This commit is contained in:
@@ -3,7 +3,6 @@ import argparse
|
||||
import pathlib
|
||||
import logging
|
||||
import time
|
||||
from copy import deepcopy
|
||||
|
||||
# External Packages
|
||||
import torch
|
||||
@@ -77,22 +76,11 @@ def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False):
|
||||
|
||||
def query(raw_query: str, model: TextSearchModel, rank_results=False):
|
||||
"Search for entries that answer the query"
|
||||
query = raw_query
|
||||
|
||||
# Use deep copy of original embeddings, entries to filter if query contains filters
|
||||
start = time.time()
|
||||
filters_in_query = [filter for filter in model.filters if filter.can_filter(query)]
|
||||
if filters_in_query:
|
||||
corpus_embeddings = deepcopy(model.corpus_embeddings)
|
||||
entries = deepcopy(model.entries)
|
||||
else:
|
||||
corpus_embeddings = model.corpus_embeddings
|
||||
entries = model.entries
|
||||
end = time.time()
|
||||
logger.debug(f"Copy Time: {end - start:.3f} seconds")
|
||||
query, entries, corpus_embeddings = raw_query, model.entries, model.corpus_embeddings
|
||||
|
||||
# Filter query, entries and embeddings before semantic search
|
||||
start = time.time()
|
||||
filters_in_query = [filter for filter in model.filters if filter.can_filter(query)]
|
||||
for filter in filters_in_query:
|
||||
query, entries, corpus_embeddings = filter.apply(query, entries, corpus_embeddings)
|
||||
end = time.time()
|
||||
|
||||
Reference in New Issue
Block a user