mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Use new Text Entry class to track text entries in Intermediate Format
- Context
- The app maintains all text content in a standard, intermediate format
- The intermediate format was loaded, passed around as a dictionary
for easier, faster updates to the intermediate format schema initially
- The intermediate format is reasonably stable now, given it's usage
by all 3 text content types currently implemented
- Changes
- Concretize text entries into `Entries' class instead of using dictionaries
- Code is updated to load, pass around entries as `Entries' objects
instead of as dictionaries
- `text_search' and `text_to_jsonl' methods are annotated with
type hints for the new `Entries' type
- Code and Tests referencing entries are updated to use class style
access patterns instead of the previous dictionary access patterns
- Move `mark_entries_for_update' method into `TextToJsonl' base class
- This is a more natural location for the method as it is only
(to be) used by `text_to_jsonl' classes
- Avoid circular reference issues on importing `Entries' class
This commit is contained in:
@@ -8,14 +8,15 @@ import torch
|
||||
|
||||
# Application Packages
|
||||
from src.search_filter.date_filter import DateFilter
|
||||
from src.utils.rawconfig import Entry
|
||||
|
||||
|
||||
def test_date_filter():
|
||||
embeddings = torch.randn(3, 10)
|
||||
entries = [
|
||||
{'compiled': '', 'raw': 'Entry with no date'},
|
||||
{'compiled': '', 'raw': 'April Fools entry: 1984-04-01'},
|
||||
{'compiled': '', 'raw': 'Entry with date:1984-04-02'}]
|
||||
Entry(compiled='', raw='Entry with no date'),
|
||||
Entry(compiled='', raw='April Fools entry: 1984-04-01'),
|
||||
Entry(compiled='', raw='Entry with date:1984-04-02')
|
||||
]
|
||||
|
||||
q_with_no_date_filter = 'head tail'
|
||||
ret_query, entry_indices = DateFilter().apply(q_with_no_date_filter, entries)
|
||||
|
||||
@@ -3,6 +3,7 @@ import torch
|
||||
|
||||
# Application Packages
|
||||
from src.search_filter.file_filter import FileFilter
|
||||
from src.utils.rawconfig import Entry
|
||||
|
||||
|
||||
def test_no_file_filter():
|
||||
@@ -104,9 +105,10 @@ def test_multiple_file_filter():
|
||||
def arrange_content():
|
||||
embeddings = torch.randn(4, 10)
|
||||
entries = [
|
||||
{'compiled': '', 'raw': 'First Entry', 'file': 'file 1.org'},
|
||||
{'compiled': '', 'raw': 'Second Entry', 'file': 'file2.org'},
|
||||
{'compiled': '', 'raw': 'Third Entry', 'file': 'file 1.org'},
|
||||
{'compiled': '', 'raw': 'Fourth Entry', 'file': 'file2.org'}]
|
||||
Entry(compiled='', raw='First Entry', file= 'file 1.org'),
|
||||
Entry(compiled='', raw='Second Entry', file= 'file2.org'),
|
||||
Entry(compiled='', raw='Third Entry', file= 'file 1.org'),
|
||||
Entry(compiled='', raw='Fourth Entry', file= 'file2.org')
|
||||
]
|
||||
|
||||
return embeddings, entries
|
||||
return entries
|
||||
|
||||
@@ -70,7 +70,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
|
||||
image_files_url='/static/images',
|
||||
count=1)
|
||||
|
||||
actual_image_path = output_directory.joinpath(Path(results[0]["entry"]).name)
|
||||
actual_image_path = output_directory.joinpath(Path(results[0].entry).name)
|
||||
actual_image = Image.open(actual_image_path)
|
||||
expected_image = Image.open(content_config.image.input_directories[0].joinpath(expected_image_name))
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC
|
||||
|
||||
# Assert
|
||||
# Actual_data should contain "Khoj via Emacs" entry
|
||||
search_result = results[0]["entry"]
|
||||
search_result = results[0].entry
|
||||
assert "git clone" in search_result
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Application Packages
|
||||
from src.search_filter.word_filter import WordFilter
|
||||
from src.utils.config import SearchType
|
||||
from src.utils.rawconfig import Entry
|
||||
|
||||
|
||||
def test_no_word_filter():
|
||||
@@ -69,9 +70,10 @@ def test_word_include_and_exclude_filter():
|
||||
|
||||
def arrange_content():
|
||||
entries = [
|
||||
{'compiled': '', 'raw': 'Minimal Entry'},
|
||||
{'compiled': '', 'raw': 'Entry with exclude_word'},
|
||||
{'compiled': '', 'raw': 'Entry with include_word'},
|
||||
{'compiled': '', 'raw': 'Entry with include_word and exclude_word'}]
|
||||
Entry(compiled='', raw='Minimal Entry'),
|
||||
Entry(compiled='', raw='Entry with exclude_word'),
|
||||
Entry(compiled='', raw='Entry with include_word'),
|
||||
Entry(compiled='', raw='Entry with include_word and exclude_word')
|
||||
]
|
||||
|
||||
return entries
|
||||
|
||||
Reference in New Issue
Block a user