mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Test text search index only updates on changes to text content
This commit is contained in:
@@ -73,6 +73,7 @@ def compute_embeddings(
|
|||||||
# Encode any new entries in the corpus and update corpus embeddings
|
# Encode any new entries in the corpus and update corpus embeddings
|
||||||
new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
|
new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
|
||||||
if new_entries:
|
if new_entries:
|
||||||
|
logger.info(f"📩 Indexing {len(new_entries)} text entries.")
|
||||||
new_embeddings = bi_encoder.encode(
|
new_embeddings = bi_encoder.encode(
|
||||||
new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
|
new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
|
||||||
)
|
)
|
||||||
@@ -87,6 +88,7 @@ def compute_embeddings(
|
|||||||
# Else compute the corpus embeddings from scratch
|
# Else compute the corpus embeddings from scratch
|
||||||
else:
|
else:
|
||||||
new_entries = [entry.compiled for _, entry in entries_with_ids]
|
new_entries = [entry.compiled for _, entry in entries_with_ids]
|
||||||
|
logger.info(f"📩 Indexing {len(new_entries)} text entries. Creating index from scratch.")
|
||||||
corpus_embeddings = bi_encoder.encode(
|
corpus_embeddings = bi_encoder.encode(
|
||||||
new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
|
new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
# System Packages
|
# System Packages
|
||||||
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# External Packages
|
# External Packages
|
||||||
@@ -48,6 +49,26 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo
|
|||||||
assert len(notes_model.corpus_embeddings) == 10
|
assert len(notes_model.corpus_embeddings) == 10
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_text_content_index_only_updates_on_changes(content_config: ContentConfig, search_config: SearchConfig, caplog):
|
||||||
|
# Arrange
|
||||||
|
caplog.set_level(logging.INFO, logger="khoj")
|
||||||
|
|
||||||
|
# Act
|
||||||
|
# Generate initial notes embeddings during asymmetric setup
|
||||||
|
text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
|
||||||
|
initial_logs = caplog.text
|
||||||
|
caplog.clear() # Clear logs
|
||||||
|
|
||||||
|
# Run asymmetric setup again with no changes to data source. Ensure index is not updated
|
||||||
|
text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
||||||
|
final_logs = caplog.text
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert "📩 Saved computed text embeddings to" in initial_logs
|
||||||
|
assert "📩 Saved computed text embeddings to" not in final_logs
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
|
def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
|
|||||||
Reference in New Issue
Block a user