Add file of each entry to entry dict in org_to_jsonl converter

- This will help filter query to org content type using file filter
- Do not explicitly specify items being extracted from json of each
  entry in text_search as all text search content types do not have
  file being set in jsonl converters
This commit is contained in:
Debanjum Singh Solanky
2022-09-05 01:57:17 +03:00
parent 7e083d3e96
commit 7606724dbc
3 changed files with 16 additions and 16 deletions

View File

@@ -52,9 +52,7 @@ def initialize_model(search_config: TextSearchConfig):
def extract_entries(jsonl_file):
"Load entries from compressed jsonl"
return [{'compiled': f'{entry["compiled"]}', 'raw': f'{entry["raw"]}'}
for entry
in load_jsonl(jsonl_file)]
return load_jsonl(jsonl_file)
def compute_embeddings(entries, bi_encoder, embeddings_file, regenerate=False):
@@ -83,7 +81,7 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False):
for filter in filters_in_query:
query, entries, corpus_embeddings = filter.apply(query, entries, corpus_embeddings)
end = time.time()
logger.debug(f"Filter Time: {end - start:.3f} seconds")
logger.debug(f"Total Filter Time: {end - start:.3f} seconds")
if entries is None or len(entries) == 0:
return [], []