Rename Files, Classes from X_To_JSONL to more appropriate X_To_Entries

These content processors are converting content into entries in DB instead of entries in JSONL file
2026-03-02 21:19:12 +00:00 · 2023-11-01 14:51:33 -07:00
parent 2ad2055bcb
commit d92a2d03a7
16 changed files with 127 additions and 125 deletions
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,7 +13,7 @@ app = FastAPI()

 # Internal Packages
 from khoj.configure import configure_routes, configure_search_types, configure_middleware
-from khoj.processor.plaintext.plaintext_to_jsonl import PlaintextToJsonl
+from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries
 from khoj.search_type import image_search, text_search
 from khoj.utils.config import SearchModels
 from khoj.utils.constants import web_directory
@@ -26,7 +26,7 @@ from khoj.utils.rawconfig import (
 )
 from khoj.utils import state, fs_syncer
 from khoj.routers.indexer import configure_content
-from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
+from khoj.processor.org_mode.org_to_entries import OrgToEntries
 from database.models import (
    KhojApiUser,
    LocalOrgConfig,
@@ -134,7 +134,7 @@ def content_config(tmp_path_factory, search_models: SearchModels, default_user:
        user=default_user,
    )

-    text_search.setup(OrgToJsonl, get_sample_data("org"), regenerate=False, user=default_user)
+    text_search.setup(OrgToEntries, get_sample_data("org"), regenerate=False, user=default_user)

    if os.getenv("GITHUB_PAT_TOKEN"):
        GithubConfig.objects.create(
@@ -242,7 +242,7 @@ def client(
    # These lines help us Mock the Search models for these search types
    state.search_models.image_search = image_search.initialize_model(search_config.image)
    text_search.setup(
-        OrgToJsonl,
+        OrgToEntries,
        get_sample_data("org"),
        regenerate=False,
        user=api_user.user,
@@ -251,7 +251,7 @@ def client(
        content_config.image, state.search_models.image_search, regenerate=False
    )
    text_search.setup(
-        PlaintextToJsonl,
+        PlaintextToEntries,
        get_sample_data("plaintext"),
        regenerate=False,
        user=api_user.user,
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -15,7 +15,7 @@ from khoj.utils import state
 from khoj.utils.state import search_models, content_index, config
 from khoj.search_type import text_search, image_search
 from khoj.utils.rawconfig import ContentConfig, SearchConfig
-from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
+from khoj.processor.org_mode.org_to_entries import OrgToEntries
 from database.models import KhojUser
 from database.adapters import EntryAdapters

@@ -176,7 +176,7 @@ def test_regenerate_with_github_fails_without_pat(client):
@pytest.mark.skip(reason="Flaky test on parallel test runs")
 def test_get_configured_types_via_api(client, sample_org_data):
    # Act
-    text_search.setup(OrgToJsonl, sample_org_data, regenerate=False)
+    text_search.setup(OrgToEntries, sample_org_data, regenerate=False)

    enabled_types = EntryAdapters.get_unique_file_types(user=None).all().values_list("file_type", flat=True)

@@ -189,7 +189,7 @@ def test_get_configured_types_via_api(client, sample_org_data):
 def test_get_api_config_types(client, sample_org_data, default_user: KhojUser):
    # Arrange
    headers = {"Authorization": "Bearer kk-secret"}
-    text_search.setup(OrgToJsonl, sample_org_data, regenerate=False, user=default_user)
+    text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)

    # Act
    response = client.get(f"/api/config/types", headers=headers)
@@ -255,7 +255,7 @@ def test_image_search(client, content_config: ContentConfig, search_config: Sear
 def test_notes_search(client, search_config: SearchConfig, sample_org_data, default_user: KhojUser):
    # Arrange
    headers = {"Authorization": "Bearer kk-secret"}
-    text_search.setup(OrgToJsonl, sample_org_data, regenerate=False, user=default_user)
+    text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)
    user_query = quote("How to git install application?")

    # Act
@@ -276,7 +276,7 @@ def test_notes_search_with_only_filters(
    # Arrange
    headers = {"Authorization": "Bearer kk-secret"}
    text_search.setup(
-        OrgToJsonl,
+        OrgToEntries,
        sample_org_data,
        regenerate=False,
        user=default_user,
@@ -298,7 +298,7 @@ def test_notes_search_with_only_filters(
 def test_notes_search_with_include_filter(client, sample_org_data, default_user: KhojUser):
    # Arrange
    headers = {"Authorization": "Bearer kk-secret"}
-    text_search.setup(OrgToJsonl, sample_org_data, regenerate=False, user=default_user)
+    text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)
    user_query = quote('How to git install application? +"Emacs"')

    # Act
@@ -317,7 +317,7 @@ def test_notes_search_with_exclude_filter(client, sample_org_data, default_user:
    # Arrange
    headers = {"Authorization": "Bearer kk-secret"}
    text_search.setup(
-        OrgToJsonl,
+        OrgToEntries,
        sample_org_data,
        regenerate=False,
        user=default_user,
@@ -339,7 +339,7 @@ def test_notes_search_with_exclude_filter(client, sample_org_data, default_user:
 def test_different_user_data_not_accessed(client, sample_org_data, default_user: KhojUser):
    # Arrange
    headers = {"Authorization": "Bearer kk-token"}  # Token for default_user2
-    text_search.setup(OrgToJsonl, sample_org_data, regenerate=False, user=default_user)
+    text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)
    user_query = quote("How to git install application?")

    # Act
--- a/tests/test_markdown_to_jsonl.py
+++ b/tests/test_markdown_to_jsonl.py
@@ -4,7 +4,7 @@ from pathlib import Path
 import os

 # Internal Packages
-from khoj.processor.markdown.markdown_to_jsonl import MarkdownToJsonl
+from khoj.processor.markdown.markdown_to_entries import MarkdownToEntries
 from khoj.utils.fs_syncer import get_markdown_files
 from khoj.utils.rawconfig import TextContentConfig

@@ -23,11 +23,11 @@ def test_markdown_file_with_no_headings_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Markdown files
-    entry_nodes, file_to_entries = MarkdownToJsonl.extract_markdown_entries(markdown_files=data)
+    entry_nodes, file_to_entries = MarkdownToEntries.extract_markdown_entries(markdown_files=data)

    # Process Each Entry from All Notes Files
-    jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
-        MarkdownToJsonl.convert_markdown_entries_to_maps(entry_nodes, file_to_entries)
+    jsonl_string = MarkdownToEntries.convert_markdown_maps_to_jsonl(
+        MarkdownToEntries.convert_markdown_entries_to_maps(entry_nodes, file_to_entries)
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

@@ -52,11 +52,11 @@ def test_single_markdown_entry_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Markdown files
-    entries, entry_to_file_map = MarkdownToJsonl.extract_markdown_entries(markdown_files=data)
+    entries, entry_to_file_map = MarkdownToEntries.extract_markdown_entries(markdown_files=data)

    # Process Each Entry from All Notes Files
-    jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(
-        MarkdownToJsonl.convert_markdown_entries_to_maps(entries, entry_to_file_map)
+    jsonl_string = MarkdownToEntries.convert_markdown_maps_to_jsonl(
+        MarkdownToEntries.convert_markdown_entries_to_maps(entries, entry_to_file_map)
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

@@ -81,11 +81,11 @@ def test_multiple_markdown_entries_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Markdown files
-    entry_strings, entry_to_file_map = MarkdownToJsonl.extract_markdown_entries(markdown_files=data)
-    entries = MarkdownToJsonl.convert_markdown_entries_to_maps(entry_strings, entry_to_file_map)
+    entry_strings, entry_to_file_map = MarkdownToEntries.extract_markdown_entries(markdown_files=data)
+    entries = MarkdownToEntries.convert_markdown_entries_to_maps(entry_strings, entry_to_file_map)

    # Process Each Entry from All Notes Files
-    jsonl_string = MarkdownToJsonl.convert_markdown_maps_to_jsonl(entries)
+    jsonl_string = MarkdownToEntries.convert_markdown_maps_to_jsonl(entries)
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -144,7 +144,7 @@ def test_extract_entries_with_different_level_headings(tmp_path):

    # Act
    # Extract Entries from specified Markdown files
-    entries, _ = MarkdownToJsonl.extract_markdown_entries(markdown_files=data)
+    entries, _ = MarkdownToEntries.extract_markdown_entries(markdown_files=data)

    # Assert
    assert len(entries) == 2
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -3,8 +3,8 @@ import json
 import os

 # Internal Packages
-from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
-from khoj.processor.text_to_jsonl import TextEntries
+from khoj.processor.org_mode.org_to_entries import OrgToEntries
+from khoj.processor.text_to_entries import TextToEntries
 from khoj.utils.helpers import is_none_or_empty
 from khoj.utils.rawconfig import Entry
 from khoj.utils.fs_syncer import get_org_files
@@ -29,9 +29,9 @@ def test_configure_heading_entry_to_jsonl(tmp_path):
    for index_heading_entries in [True, False]:
        # Act
        # Extract entries into jsonl from specified Org files
-        jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-            OrgToJsonl.convert_org_nodes_to_entries(
-                *OrgToJsonl.extract_org_entries(org_files=data), index_heading_entries=index_heading_entries
+        jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
+            OrgToEntries.convert_org_nodes_to_entries(
+                *OrgToEntries.extract_org_entries(org_files=data), index_heading_entries=index_heading_entries
            )
        )
        jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -59,12 +59,12 @@ def test_entry_split_when_exceeds_max_words(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=data)
+    entries, entry_to_file_map = OrgToEntries.extract_org_entries(org_files=data)

    # Split each entry from specified Org files by max words
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-        TextEntries.split_entries_by_max_tokens(
-            OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
+        TextToEntries.split_entries_by_max_tokens(
+            OrgToEntries.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
        )
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -86,7 +86,7 @@ def test_entry_split_drops_large_words():

    # Act
    # Split entry by max words and drop words larger than max word length
-    processed_entry = TextEntries.split_entries_by_max_tokens([entry], max_word_length=5)[0]
+    processed_entry = TextToEntries.split_entries_by_max_tokens([entry], max_word_length=5)[0]

    # Assert
    # "Heading" dropped from compiled version because its over the set max word limit
@@ -109,11 +109,11 @@ def test_entry_with_body_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=data)
+    entries, entry_to_file_map = OrgToEntries.extract_org_entries(org_files=data)

    # Process Each Entry from All Notes Files
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-        OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map)
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
+        OrgToEntries.convert_org_nodes_to_entries(entries, entry_to_file_map)
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

@@ -136,11 +136,11 @@ Intro text

    # Act
    # Extract Entries from specified Org files
-    entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=data)
+    entry_nodes, file_to_entries = OrgToEntries.extract_org_entries(org_files=data)

    # Process Each Entry from All Notes Files
-    entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
+    entries = OrgToEntries.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(entries)
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -160,11 +160,11 @@ def test_file_with_no_headings_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=data)
+    entry_nodes, file_to_entries = OrgToEntries.extract_org_entries(org_files=data)

    # Process Each Entry from All Notes Files
-    entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
+    entries = OrgToEntries.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(entries)
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -224,7 +224,7 @@ def test_extract_entries_with_different_level_headings(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entries, _ = OrgToJsonl.extract_org_entries(org_files=data)
+    entries, _ = OrgToEntries.extract_org_entries(org_files=data)

    # Assert
    assert len(entries) == 2
--- a/tests/test_pdf_to_jsonl.py
+++ b/tests/test_pdf_to_jsonl.py
@@ -3,7 +3,7 @@ import json
 import os

 # Internal Packages
-from khoj.processor.pdf.pdf_to_jsonl import PdfToJsonl
+from khoj.processor.pdf.pdf_to_entries import PdfToEntries

 from khoj.utils.fs_syncer import get_pdf_files
 from khoj.utils.rawconfig import TextContentConfig
@@ -18,11 +18,11 @@ def test_single_page_pdf_to_jsonl():
        pdf_bytes = f.read()

    data = {"tests/data/pdf/singlepage.pdf": pdf_bytes}
-    entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
+    entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)

    # Process Each Entry from All Pdf Files
-    jsonl_string = PdfToJsonl.convert_pdf_maps_to_jsonl(
-        PdfToJsonl.convert_pdf_entries_to_maps(entries, entry_to_file_map)
+    jsonl_string = PdfToEntries.convert_pdf_maps_to_jsonl(
+        PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

@@ -38,11 +38,11 @@ def test_multi_page_pdf_to_jsonl():
        pdf_bytes = f.read()

    data = {"tests/data/pdf/multipage.pdf": pdf_bytes}
-    entries, entry_to_file_map = PdfToJsonl.extract_pdf_entries(pdf_files=data)
+    entries, entry_to_file_map = PdfToEntries.extract_pdf_entries(pdf_files=data)

    # Process Each Entry from All Pdf Files
-    jsonl_string = PdfToJsonl.convert_pdf_maps_to_jsonl(
-        PdfToJsonl.convert_pdf_entries_to_maps(entries, entry_to_file_map)
+    jsonl_string = PdfToEntries.convert_pdf_maps_to_jsonl(
+        PdfToEntries.convert_pdf_entries_to_maps(entries, entry_to_file_map)
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

--- a/tests/test_plaintext_to_jsonl.py
+++ b/tests/test_plaintext_to_jsonl.py
@@ -6,7 +6,7 @@ from pathlib import Path
 # Internal Packages
 from khoj.utils.fs_syncer import get_plaintext_files
 from khoj.utils.rawconfig import TextContentConfig
-from khoj.processor.plaintext.plaintext_to_jsonl import PlaintextToJsonl
+from khoj.processor.plaintext.plaintext_to_entries import PlaintextToEntries
 from database.models import LocalPlaintextConfig, KhojUser


@@ -27,14 +27,14 @@ def test_plaintext_file(tmp_path):
        f"{plaintextfile}": entry,
    }

-    maps = PlaintextToJsonl.convert_plaintext_entries_to_maps(entry_to_file_map=data)
+    maps = PlaintextToEntries.convert_plaintext_entries_to_maps(entry_to_file_map=data)

    # Convert each entry.file to absolute path to make them JSON serializable
    for map in maps:
        map.file = str(Path(map.file).absolute())

    # Process Each Entry from All Notes Files
-    jsonl_string = PlaintextToJsonl.convert_entries_to_jsonl(maps)
+    jsonl_string = PlaintextToEntries.convert_entries_to_jsonl(maps)
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -100,7 +100,7 @@ def test_parse_html_plaintext_file(content_config, default_user: KhojUser):
    extracted_plaintext_files = get_plaintext_files(config=config)

    # Act
-    maps = PlaintextToJsonl.convert_plaintext_entries_to_maps(extracted_plaintext_files)
+    maps = PlaintextToEntries.convert_plaintext_entries_to_maps(extracted_plaintext_files)

    # Assert
    assert len(maps) == 1
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -10,8 +10,8 @@ import pytest
 # Internal Packages
 from khoj.search_type import text_search
 from khoj.utils.rawconfig import ContentConfig, SearchConfig
-from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
-from khoj.processor.github.github_to_jsonl import GithubToJsonl
+from khoj.processor.org_mode.org_to_entries import OrgToEntries
+from khoj.processor.github.github_to_entries import GithubToEntries
 from khoj.utils.fs_syncer import collect_files, get_org_files
 from database.models import LocalOrgConfig, KhojUser, Entry, GithubConfig

@@ -65,7 +65,7 @@ def test_text_search_setup_with_empty_file_raises_error(
    # Act
    # Generate notes embeddings during asymmetric setup
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)

    assert "Created 0 new embeddings. Deleted 3 embeddings for user " in caplog.records[-1].message
    verify_embeddings(0, default_user)
@@ -80,7 +80,7 @@ def test_text_indexer_deletes_embedding_before_regenerate(
    org_config = LocalOrgConfig.objects.filter(user=default_user).first()
    data = get_org_files(org_config)
    with caplog.at_level(logging.DEBUG):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)

    # Assert
    assert "Deleting all embeddings for file type org" in caplog.text
@@ -94,7 +94,7 @@ def test_text_search_setup_batch_processes(content_config: ContentConfig, defaul
    org_config = LocalOrgConfig.objects.filter(user=default_user).first()
    data = get_org_files(org_config)
    with caplog.at_level(logging.DEBUG):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)

    # Assert
    assert "Created 4 new embeddings" in caplog.text
@@ -112,13 +112,13 @@ def test_text_index_same_if_content_unchanged(content_config: ContentConfig, def
    # Act
    # Generate initial notes embeddings during asymmetric setup
    with caplog.at_level(logging.DEBUG):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
    initial_logs = caplog.text
    caplog.clear()  # Clear logs

    # Run asymmetric setup again with no changes to data source. Ensure index is not updated
    with caplog.at_level(logging.DEBUG):
-        text_search.setup(OrgToJsonl, data, regenerate=False, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=False, user=default_user)
    final_logs = caplog.text

    # Assert
@@ -148,7 +148,7 @@ async def test_text_search(search_config: SearchConfig):
    await loop.run_in_executor(
        None,
        text_search.setup,
-        OrgToJsonl,
+        OrgToEntries,
        data,
        True,
        True,
@@ -185,7 +185,7 @@ def test_entry_chunking_by_max_tokens(org_config_with_only_new_file: LocalOrgCon
    # Act
    # reload embeddings, entries, notes model after adding new org-mode file
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=False, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=False, user=default_user)

    # Assert
    # verify newly added org-mode entry is split by max tokens
@@ -218,7 +218,7 @@ conda activate khoj
 #+end_src"""
    }
    text_search.setup(
-        OrgToJsonl,
+        OrgToEntries,
        data,
        regenerate=False,
        user=default_user,
@@ -237,7 +237,7 @@ conda activate khoj
    # reload embeddings, entries, notes model after adding new org-mode file
    with caplog.at_level(logging.INFO):
        text_search.setup(
-            OrgToJsonl,
+            OrgToEntries,
            data,
            regenerate=False,
            full_corpus=False,
@@ -259,7 +259,7 @@ def test_regenerate_index_with_new_entry(
    data = get_org_files(org_config)

    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)

    assert "Created 10 new embeddings. Deleted 3 embeddings for user " in caplog.records[-1].message

@@ -273,7 +273,7 @@ def test_regenerate_index_with_new_entry(
    # Act
    # regenerate notes jsonl, model embeddings and model to include entry from new file
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)

    # Assert
    assert "Created 11 new embeddings. Deleted 10 embeddings for user " in caplog.records[-1].message
@@ -298,7 +298,7 @@ def test_update_index_with_duplicate_entries_in_stable_order(
    # Act
    # generate embeddings, entries, notes model from scratch after adding new org-mode file
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
    initial_logs = caplog.text
    caplog.clear()  # Clear logs

@@ -306,7 +306,7 @@ def test_update_index_with_duplicate_entries_in_stable_order(

    # update embeddings, entries, notes model with no new changes
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=False, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=False, user=default_user)
    final_logs = caplog.text

    # Assert
@@ -331,7 +331,7 @@ def test_update_index_with_deleted_entry(org_config_with_only_new_file: LocalOrg

    # load embeddings, entries, notes model after adding new org file with 2 entries
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
    initial_logs = caplog.text
    caplog.clear()  # Clear logs

@@ -343,7 +343,7 @@ def test_update_index_with_deleted_entry(org_config_with_only_new_file: LocalOrg

    # Act
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=False, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=False, user=default_user)
    final_logs = caplog.text

    # Assert
@@ -361,7 +361,7 @@ def test_update_index_with_new_entry(content_config: ContentConfig, new_org_file
    org_config = LocalOrgConfig.objects.filter(user=default_user).first()
    data = get_org_files(org_config)
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=True, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
    initial_logs = caplog.text
    caplog.clear()  # Clear logs

@@ -375,7 +375,7 @@ def test_update_index_with_new_entry(content_config: ContentConfig, new_org_file
    # Act
    # update embeddings, entries with the newly added note
    with caplog.at_level(logging.INFO):
-        text_search.setup(OrgToJsonl, data, regenerate=False, user=default_user)
+        text_search.setup(OrgToEntries, data, regenerate=False, user=default_user)
    final_logs = caplog.text

    # Assert
@@ -393,7 +393,7 @@ def test_text_search_setup_github(content_config: ContentConfig, default_user: K
    # Act
    # Regenerate github embeddings to test asymmetric setup without caching
    text_search.setup(
-        GithubToJsonl,
+        GithubToEntries,
        {},
        regenerate=True,
        user=default_user,