mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Create test directory with model data to reuse for pytest session
- Use pytest fixture with session scope - Instantiate test directory with model data to reuse for tests
This commit is contained in:
35
tests/conftest.py
Normal file
35
tests/conftest.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Standard Packages
|
||||||
|
import pytest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Internal Packages
|
||||||
|
from src.utils.config import SearchConfig, TextSearchConfig, ImageSearchConfig
|
||||||
|
from src.search_type import asymmetric, image_search
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope='session')
|
||||||
|
def model_dir(tmp_path_factory):
|
||||||
|
model_dir = tmp_path_factory.mktemp('data')
|
||||||
|
|
||||||
|
# Generate Image Embeddings from Test Images
|
||||||
|
search_config = SearchConfig()
|
||||||
|
search_config.image = ImageSearchConfig(
|
||||||
|
input_directory = Path('tests/data'),
|
||||||
|
embeddings_file = model_dir.joinpath('.image_embeddings.pt'),
|
||||||
|
batch_size = 10,
|
||||||
|
use_xmp_metadata = False,
|
||||||
|
verbose = 2)
|
||||||
|
|
||||||
|
image_search.setup(search_config.image, regenerate=False)
|
||||||
|
|
||||||
|
# Generate Notes Embeddings from Test Notes
|
||||||
|
search_config.notes = TextSearchConfig(
|
||||||
|
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
||||||
|
input_filter = None,
|
||||||
|
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
||||||
|
embeddings_file = model_dir.joinpath('.note_embeddings.pt'),
|
||||||
|
verbose = 0)
|
||||||
|
|
||||||
|
asymmetric.setup(search_config.notes, regenerate=False)
|
||||||
|
|
||||||
|
return model_dir
|
||||||
@@ -16,6 +16,25 @@ from src.utils.helpers import resolve_absolute_path
|
|||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
client = TestClient(app)
|
client = TestClient(app)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def search_config(model_dir):
|
||||||
|
search_config = SearchConfig()
|
||||||
|
search_config.notes = TextSearchConfig(
|
||||||
|
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
||||||
|
input_filter = None,
|
||||||
|
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
||||||
|
embeddings_file = model_dir.joinpath('.note_embeddings.pt'),
|
||||||
|
verbose = 0)
|
||||||
|
|
||||||
|
search_config.image = ImageSearchConfig(
|
||||||
|
input_directory = Path('tests/data'),
|
||||||
|
embeddings_file = Path('tests/data/.image_embeddings.pt'),
|
||||||
|
batch_size = 10,
|
||||||
|
use_xmp_metadata = False,
|
||||||
|
verbose = 2)
|
||||||
|
|
||||||
|
return search_config
|
||||||
|
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
@@ -60,26 +79,9 @@ def test_regenerate_with_valid_search_type():
|
|||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search():
|
def test_notes_search(search_config):
|
||||||
# Arrange
|
# Arrange
|
||||||
search_config = SearchConfig()
|
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
||||||
search_config.notes = TextSearchConfig(
|
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
|
||||||
input_filter = None,
|
|
||||||
compressed_jsonl = Path('tests/data/.test.jsonl.gz'),
|
|
||||||
embeddings_file = Path('tests/data/.test_embeddings.pt'),
|
|
||||||
verbose = 0)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
# Regenerate embeddings during asymmetric setup
|
|
||||||
notes_model = asymmetric.setup(search_config.notes, regenerate=True)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert len(notes_model.entries) == 10
|
|
||||||
assert len(notes_model.corpus_embeddings) == 10
|
|
||||||
|
|
||||||
# Arrange
|
|
||||||
model.notes_search = notes_model
|
|
||||||
user_query = "How to git install application?"
|
user_query = "How to git install application?"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
@@ -93,26 +95,9 @@ def test_notes_search():
|
|||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search_with_include_filter():
|
def test_notes_search_with_include_filter(search_config):
|
||||||
# Arrange
|
# Arrange
|
||||||
search_config = SearchConfig()
|
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
||||||
search_config.notes = TextSearchConfig(
|
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
|
||||||
input_filter = None,
|
|
||||||
compressed_jsonl = Path('tests/data/.test.jsonl.gz'),
|
|
||||||
embeddings_file = Path('tests/data/.test_embeddings.pt'),
|
|
||||||
verbose = 0)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
# Regenerate embeddings during asymmetric setup
|
|
||||||
notes_model = asymmetric.setup(search_config.notes, regenerate=True)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert len(notes_model.entries) == 10
|
|
||||||
assert len(notes_model.corpus_embeddings) == 10
|
|
||||||
|
|
||||||
# Arrange
|
|
||||||
model.notes_search = notes_model
|
|
||||||
user_query = "How to git install application? +Emacs"
|
user_query = "How to git install application? +Emacs"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
@@ -120,32 +105,15 @@ def test_notes_search_with_include_filter():
|
|||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
# assert actual_data does not contains explicitly excluded word "Emacs"
|
# assert actual_data contains explicitly included word "Emacs"
|
||||||
search_result = response.json()[0]["Entry"]
|
search_result = response.json()[0]["Entry"]
|
||||||
assert "Emacs" in search_result
|
assert "Emacs" in search_result
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search_with_exclude_filter():
|
def test_notes_search_with_exclude_filter(search_config):
|
||||||
# Arrange
|
# Arrange
|
||||||
search_config = SearchConfig()
|
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
||||||
search_config.notes = TextSearchConfig(
|
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
|
||||||
input_filter = None,
|
|
||||||
compressed_jsonl = Path('tests/data/.test.jsonl.gz'),
|
|
||||||
embeddings_file = Path('tests/data/.test_embeddings.pt'),
|
|
||||||
verbose = 0)
|
|
||||||
|
|
||||||
# Act
|
|
||||||
# Regenerate embeddings during asymmetric setup
|
|
||||||
notes_model = asymmetric.setup(search_config.notes, regenerate=True)
|
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert len(notes_model.entries) == 10
|
|
||||||
assert len(notes_model.corpus_embeddings) == 10
|
|
||||||
|
|
||||||
# Arrange
|
|
||||||
model.notes_search = notes_model
|
|
||||||
user_query = "How to git install application? -clone"
|
user_query = "How to git install application? -clone"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
@@ -159,28 +127,15 @@ def test_notes_search_with_exclude_filter():
|
|||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_image_search():
|
def test_image_search(search_config):
|
||||||
# Arrange
|
# Arrange
|
||||||
search_config = SearchConfig()
|
model.image_search = image_search.setup(search_config.image, regenerate=False)
|
||||||
search_config.image = ImageSearchConfig(
|
query_expected_image_pairs = [("kitten in a park", "kitten_park.jpg"),
|
||||||
input_directory = Path('tests/data'),
|
("horse and dog in a farm", "horse_dog.jpg"),
|
||||||
embeddings_file = Path('tests/data/.image_embeddings.pt'),
|
("A guinea pig eating grass", "guineapig_grass.jpg")]
|
||||||
batch_size = 10,
|
|
||||||
use_xmp_metadata = False,
|
|
||||||
verbose = 2)
|
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
model.image_search = image_search.setup(search_config.image, regenerate=True)
|
for query, expected_image_name in query_expected_image_pairs:
|
||||||
|
|
||||||
# Assert
|
|
||||||
assert len(model.image_search.image_names) == 3
|
|
||||||
assert len(model.image_search.image_embeddings) == 3
|
|
||||||
|
|
||||||
# Arrange
|
|
||||||
for query, expected_image_name in [("kitten in a park", "kitten_park.jpg"),
|
|
||||||
("horse and dog in a farm", "horse_dog.jpg"),
|
|
||||||
("A guinea pig eating grass", "guineapig_grass.jpg")]:
|
|
||||||
# Act
|
|
||||||
hits = image_search.query(
|
hits = image_search.query(
|
||||||
query,
|
query,
|
||||||
count = 1,
|
count = 1,
|
||||||
@@ -200,29 +155,22 @@ def test_image_search():
|
|||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_regenerate():
|
def test_asymmetric_setup(search_config):
|
||||||
# Arrange
|
|
||||||
search_config = SearchConfig()
|
|
||||||
search_config.notes = TextSearchConfig(
|
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
|
||||||
input_filter = None,
|
|
||||||
compressed_jsonl = Path('tests/data/.test.jsonl.gz'),
|
|
||||||
embeddings_file = Path('tests/data/.test_embeddings.pt'),
|
|
||||||
verbose = 0)
|
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
# Regenerate embeddings during asymmetric setup
|
# Regenerate notes embeddings during asymmetric setup
|
||||||
notes_model = asymmetric.setup(search_config.notes, regenerate=True)
|
notes_model = asymmetric.setup(search_config.notes, regenerate=True)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert len(notes_model.entries) == 10
|
assert len(notes_model.entries) == 10
|
||||||
assert len(notes_model.corpus_embeddings) == 10
|
assert len(notes_model.corpus_embeddings) == 10
|
||||||
|
|
||||||
# Arrange
|
|
||||||
model.notes_search = notes_model
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
def test_image_search_setup(search_config):
|
||||||
# Act
|
# Act
|
||||||
response = client.get(f"/regenerate?t=notes")
|
# Regenerate image search embeddings during image setup
|
||||||
|
image_search_model = image_search.setup(search_config.image, regenerate=True)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert response.status_code == 200
|
assert len(image_search_model.image_names) == 3
|
||||||
|
assert len(image_search_model.image_embeddings) == 3
|
||||||
|
|||||||
Reference in New Issue
Block a user