Use Base TextToJsonl class to standardize <text>_to_jsonl processors

- Start standardizing implementation of the `text_to_jsonl' processors
  - `text_to_jsonl; scripts already had a shared structure
  - This change starts to codify that implicit structure

- Benefits
  - Ease adding more `text_to_jsonl; processors
  - Allow merging shared functionality
  - Help with type hinting

- Drawbacks
  - Lower agility to change. But this was already an implicit issue as
    the text_to_jsonl processors got more deeply wired into the app
This commit is contained in:
Debanjum Singh Solanky
2022-09-14 10:53:43 +03:00
parent c16ae9e344
commit 02d944030f
12 changed files with 364 additions and 345 deletions

View File

@@ -6,7 +6,7 @@ from src.search_type import image_search, text_search
from src.utils.config import SearchType
from src.utils.helpers import resolve_absolute_path
from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
from src.processor.org_mode.org_to_jsonl import OrgToJsonl
from src.search_filter.date_filter import DateFilter
from src.search_filter.word_filter import WordFilter
from src.search_filter.file_filter import FileFilter
@@ -60,6 +60,6 @@ def content_config(tmp_path_factory, search_config: SearchConfig):
embeddings_file = content_dir.joinpath('note_embeddings.pt'))
filters = [DateFilter(), WordFilter(), FileFilter()]
text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
return content_config