diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py index 8cfd35a2..9d8d5c3a 100644 --- a/src/khoj/search_type/text_search.py +++ b/src/khoj/search_type/text_search.py @@ -73,6 +73,7 @@ def compute_embeddings( # Encode any new entries in the corpus and update corpus embeddings new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1] if new_entries: + logger.info(f"📩 Indexing {len(new_entries)} text entries.") new_embeddings = bi_encoder.encode( new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True ) @@ -87,6 +88,7 @@ def compute_embeddings( # Else compute the corpus embeddings from scratch else: new_entries = [entry.compiled for _, entry in entries_with_ids] + logger.info(f"📩 Indexing {len(new_entries)} text entries. Creating index from scratch.") corpus_embeddings = bi_encoder.encode( new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True ) diff --git a/tests/test_text_search.py b/tests/test_text_search.py index a2c89f4e..830feb9b 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -1,4 +1,5 @@ # System Packages +import logging from pathlib import Path # External Packages @@ -48,6 +49,26 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo assert len(notes_model.corpus_embeddings) == 10 +# ---------------------------------------------------------------------------------------------------- +def test_text_content_index_only_updates_on_changes(content_config: ContentConfig, search_config: SearchConfig, caplog): + # Arrange + caplog.set_level(logging.INFO, logger="khoj") + + # Act + # Generate initial notes embeddings during asymmetric setup + text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True) + initial_logs = caplog.text + caplog.clear() # Clear logs + + # Run asymmetric setup again with no changes to data source. Ensure index is not updated + text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False) + final_logs = caplog.text + + # Assert + assert "📩 Saved computed text embeddings to" in initial_logs + assert "📩 Saved computed text embeddings to" not in final_logs + + # ---------------------------------------------------------------------------------------------------- def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig): # Arrange