Merge Symmetric, Asymmetric Search Types into a single Text Search Type

- The code for both the text search types were mostly the same
  It was earlier done this way for expedience while experimenting
- The minor differences were reconciled and merged into a single
  text_search type
- This simplifies the app and making it easier to process other
  text types
This commit is contained in:
Debanjum Singh Solanky
2022-07-21 18:05:43 +04:00
parent 0917f1574d
commit 0602d018c0
9 changed files with 52 additions and 324 deletions

View File

@@ -3,8 +3,9 @@ import pytest
import torch
# Internal Packages
from src.search_type import asymmetric, image_search
from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, SymmetricSearchConfig, AsymmetricSearchConfig, ImageSearchConfig
from src.search_type import image_search, text_search
from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
@pytest.fixture(scope='session')
@@ -13,13 +14,13 @@ def search_config(tmp_path_factory):
search_config = SearchConfig()
search_config.asymmetric = SymmetricSearchConfig(
search_config.symmetric = TextSearchConfig(
encoder = "sentence-transformers/all-MiniLM-L6-v2",
cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
model_directory = model_dir
)
search_config.asymmetric = AsymmetricSearchConfig(
search_config.asymmetric = TextSearchConfig(
encoder = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
model_directory = model_dir
@@ -55,7 +56,7 @@ def model_dir(search_config):
compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
embeddings_file = model_dir.joinpath('note_embeddings.pt'))
asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False, device=device, verbose=True)
text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, device=device, verbose=True)
return model_dir

View File

@@ -3,8 +3,9 @@ from pathlib import Path
# Internal Packages
from src.main import model
from src.search_type import asymmetric
from src.search_type import text_search
from src.utils.rawconfig import ContentConfig, SearchConfig
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
# Test
@@ -12,7 +13,7 @@ from src.utils.rawconfig import ContentConfig, SearchConfig
def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchConfig):
# Act
# Regenerate notes embeddings during asymmetric setup
notes_model = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=True)
notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True)
# Assert
assert len(notes_model.entries) == 10
@@ -22,15 +23,15 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo
# ----------------------------------------------------------------------------------------------------
def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
model.notes_search = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False)
model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
query = "How to git install application?"
# Act
hits, entries = asymmetric.query(
hits, entries = text_search.query(
query,
model = model.notes_search)
results = asymmetric.collate_results(
results = text_search.collate_results(
hits,
entries,
count=1)
@@ -44,7 +45,7 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC
# ----------------------------------------------------------------------------------------------------
def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
initial_notes_model= asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False)
initial_notes_model= text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
assert len(initial_notes_model.entries) == 10
assert len(initial_notes_model.corpus_embeddings) == 10
@@ -57,11 +58,11 @@ def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchC
f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")
# regenerate notes jsonl, model embeddings and model to include entry from new file
regenerated_notes_model = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=True)
regenerated_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True)
# Act
# reload embeddings, entries, notes model from previously generated notes jsonl and model embeddings files
initial_notes_model = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False)
initial_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
# Assert
assert len(regenerated_notes_model.entries) == 11

View File

@@ -8,9 +8,9 @@ import pytest
# Internal Packages
from src.main import app, model, config
from src.search_type import asymmetric, image_search
from src.utils.helpers import resolve_absolute_path
from src.search_type import text_search, image_search
from src.utils.rawconfig import ContentConfig, SearchConfig
from src.processor.org_mode import org_to_jsonl
# Arrange
@@ -115,7 +115,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
# ----------------------------------------------------------------------------------------------------
def test_notes_search(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
model.notes_search = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False)
model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
user_query = "How to git install application?"
# Act
@@ -131,7 +131,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
# ----------------------------------------------------------------------------------------------------
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
model.notes_search = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False)
model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
user_query = "How to git install application? +Emacs"
# Act
@@ -147,7 +147,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
# ----------------------------------------------------------------------------------------------------
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
# Arrange
model.notes_search = asymmetric.setup(content_config.org, search_config.asymmetric, regenerate=False)
model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
user_query = "How to git install application? -clone"
# Act