From d40076fcd6abdf27e058f374fe0aa3301d94848a Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 9 Jan 2023 16:17:36 -0300 Subject: [PATCH] Deduplicate test code, make teardown more robust using pytest fixtures --- tests/conftest.py | 23 +++++++++++ tests/test_text_search.py | 80 +++++++++++++-------------------------- 2 files changed, 49 insertions(+), 54 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index ec87f964..538e9206 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,6 @@ # External Packages +from copy import deepcopy +from pathlib import Path import pytest # Internal Packages @@ -62,3 +64,24 @@ def content_config(tmp_path_factory, search_config: SearchConfig): text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters) return content_config + + +@pytest.fixture(scope='function') +def new_org_file(content_config: ContentConfig): + # Setup + new_org_file = Path(content_config.org.input_filter[0]).parent / "new_file.org" + new_org_file.touch() + + yield new_org_file + + # Cleanup + if new_org_file.exists(): + new_org_file.unlink() + + +@pytest.fixture(scope='function') +def org_config_with_only_new_file(content_config: ContentConfig, new_org_file: Path): + new_org_config = deepcopy(content_config.org) + new_org_config.input_files = [f'{new_org_file}'] + new_org_config.input_filter = None + return new_org_config \ No newline at end of file diff --git a/tests/test_text_search.py b/tests/test_text_search.py index dcacf7fb..8b162874 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -8,42 +8,30 @@ import pytest # Internal Packages from src.utils.state import model from src.search_type import text_search -from src.utils.rawconfig import ContentConfig, SearchConfig +from src.utils.rawconfig import ContentConfig, SearchConfig, TextContentConfig from src.processor.org_mode.org_to_jsonl import OrgToJsonl # Test # ---------------------------------------------------------------------------------------------------- -def test_asymmetric_setup_with_missing_file_raises_error(content_config: ContentConfig, search_config: SearchConfig): +def test_asymmetric_setup_with_missing_file_raises_error(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig): # Arrange - file_to_index = Path(content_config.org.input_filter[0]).parent / "new_file_to_index.org" - new_org_content_config = deepcopy(content_config.org) - new_org_content_config.input_files = [f'{file_to_index}'] - new_org_content_config.input_filter = None + # Ensure file mentioned in org.input-files is missing + single_new_file = Path(org_config_with_only_new_file.input_files[0]) + single_new_file.unlink() # Act # Generate notes embeddings during asymmetric setup with pytest.raises(FileNotFoundError): - text_search.setup(OrgToJsonl, new_org_content_config, search_config.asymmetric, regenerate=True) + text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=True) # ---------------------------------------------------------------------------------------------------- -def test_asymmetric_setup_with_empty_file_raises_error(content_config: ContentConfig, search_config: SearchConfig): - # Arrange - file_to_index = Path(content_config.org.input_filter[0]).parent / "new_file_to_index.org" - file_to_index.touch() - new_org_content_config = deepcopy(content_config.org) - new_org_content_config.input_files = [f'{file_to_index}'] - new_org_content_config.input_filter = None - +def test_asymmetric_setup_with_empty_file_raises_error(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig): # Act # Generate notes embeddings during asymmetric setup with pytest.raises(ValueError, match=r'^No valid entries found*'): - text_search.setup(OrgToJsonl, new_org_content_config, search_config.asymmetric, regenerate=True) - - # Cleanup - # delete created test file - file_to_index.unlink() + text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=True) # ---------------------------------------------------------------------------------------------------- @@ -81,51 +69,37 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC # ---------------------------------------------------------------------------------------------------- -def test_entry_chunking_by_max_tokens(content_config: ContentConfig, search_config: SearchConfig): +def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: TextContentConfig, search_config: SearchConfig): # Arrange - initial_notes_model= text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True) - - assert len(initial_notes_model.entries) == 10 - assert len(initial_notes_model.corpus_embeddings) == 10 - - file_to_add_on_reload = Path(content_config.org.input_filter[0]).parent / "entry_exceeding_max_tokens.org" - content_config.org.input_files = [f'{file_to_add_on_reload}'] - # Insert org-mode entry with size exceeding max token limit to new org file max_tokens = 256 - with open(file_to_add_on_reload, "w") as f: + new_file_to_index = Path(org_config_with_only_new_file.input_files[0]) + with open(new_file_to_index, "w") as f: f.write(f"* Entry more than {max_tokens} words\n") for index in range(max_tokens+1): f.write(f"{index} ") # Act # reload embeddings, entries, notes model after adding new org-mode file - initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False) + initial_notes_model = text_search.setup(OrgToJsonl, org_config_with_only_new_file, search_config.asymmetric, regenerate=False) # Assert # verify newly added org-mode entry is split by max tokens - assert len(initial_notes_model.entries) == 12 - assert len(initial_notes_model.corpus_embeddings) == 12 - - # Cleanup - # delete reload test file added - content_config.org.input_files = [] - file_to_add_on_reload.unlink() + assert len(initial_notes_model.entries) == 2 + assert len(initial_notes_model.corpus_embeddings) == 2 # ---------------------------------------------------------------------------------------------------- -def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig): +def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig, new_org_file: Path): # Arrange initial_notes_model= text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True) assert len(initial_notes_model.entries) == 10 assert len(initial_notes_model.corpus_embeddings) == 10 - file_to_add_on_reload = Path(content_config.org.input_filter[0]).parent / "reload.org" - content_config.org.input_files = [f'{file_to_add_on_reload}'] - - # append Org-Mode Entry to first Org Input File in Config - with open(file_to_add_on_reload, "w") as f: + # append org-mode entry to first org input file in config + content_config.org.input_files = [f'{new_org_file}'] + with open(new_org_file, "w") as f: f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n") # regenerate notes jsonl, model embeddings and model to include entry from new file @@ -139,40 +113,38 @@ def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchC assert len(regenerated_notes_model.entries) == 11 assert len(regenerated_notes_model.corpus_embeddings) == 11 + # Assert # verify new entry loaded from updated embeddings, entries assert len(initial_notes_model.entries) == 11 assert len(initial_notes_model.corpus_embeddings) == 11 # Cleanup - # delete reload test file added + # reset input_files in config to empty list content_config.org.input_files = [] - file_to_add_on_reload.unlink() # ---------------------------------------------------------------------------------------------------- -def test_incremental_update(content_config: ContentConfig, search_config: SearchConfig): +def test_incremental_update(content_config: ContentConfig, search_config: SearchConfig, new_org_file: Path): # Arrange initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True) assert len(initial_notes_model.entries) == 10 assert len(initial_notes_model.corpus_embeddings) == 10 - file_to_add_on_update = Path(content_config.org.input_filter[0]).parent / "update.org" - content_config.org.input_files = [f'{file_to_add_on_update}'] - - # append Org-Mode Entry to first Org Input File in Config - with open(file_to_add_on_update, "w") as f: + # append org-mode entry to first org input file in config + with open(new_org_file, "w") as f: f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n") # Act # update embeddings, entries with the newly added note + content_config.org.input_files = [f'{new_org_file}'] initial_notes_model = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False) + # Assert # verify new entry added in updated embeddings, entries assert len(initial_notes_model.entries) == 11 assert len(initial_notes_model.corpus_embeddings) == 11 # Cleanup - # delete file added for update testing + # reset input_files in config to empty list content_config.org.input_files = [] - file_to_add_on_update.unlink()