Ensure top_k never more than total entries to search from

This commit is contained in:
Debanjum Singh Solanky
2021-11-17 00:56:31 +05:30
parent 4d37ace3d6
commit 8cf2465e8e

View File

@@ -161,6 +161,7 @@ def setup(config: TextSearchConfig, regenerate: bool) -> TextSearchModel:
# Extract Entries
entries = extract_entries(config.compressed_jsonl, config.verbose)
top_k = min(len(entries), top_k) # top_k hits can't be more than the total entries in corpus
# Compute or Load Embeddings
corpus_embeddings = compute_embeddings(entries, bi_encoder, config.embeddings_file, regenerate=regenerate, verbose=config.verbose)