Rename Files, Classes from X_To_JSONL to more appropriate X_To_Entries

These content processors are converting content into entries in DB instead of entries in JSONL file
2026-03-06 13:22:12 +00:00 · 2023-11-01 14:51:33 -07:00
parent 2ad2055bcb
commit d92a2d03a7
16 changed files with 127 additions and 125 deletions
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -3,8 +3,8 @@ import json
 import os

 # Internal Packages
-from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
-from khoj.processor.text_to_jsonl import TextEntries
+from khoj.processor.org_mode.org_to_entries import OrgToEntries
+from khoj.processor.text_to_entries import TextToEntries
 from khoj.utils.helpers import is_none_or_empty
 from khoj.utils.rawconfig import Entry
 from khoj.utils.fs_syncer import get_org_files
@@ -29,9 +29,9 @@ def test_configure_heading_entry_to_jsonl(tmp_path):
    for index_heading_entries in [True, False]:
        # Act
        # Extract entries into jsonl from specified Org files
-        jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-            OrgToJsonl.convert_org_nodes_to_entries(
-                *OrgToJsonl.extract_org_entries(org_files=data), index_heading_entries=index_heading_entries
+        jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
+            OrgToEntries.convert_org_nodes_to_entries(
+                *OrgToEntries.extract_org_entries(org_files=data), index_heading_entries=index_heading_entries
            )
        )
        jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -59,12 +59,12 @@ def test_entry_split_when_exceeds_max_words(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=data)
+    entries, entry_to_file_map = OrgToEntries.extract_org_entries(org_files=data)

    # Split each entry from specified Org files by max words
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-        TextEntries.split_entries_by_max_tokens(
-            OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
+        TextToEntries.split_entries_by_max_tokens(
+            OrgToEntries.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
        )
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -86,7 +86,7 @@ def test_entry_split_drops_large_words():

    # Act
    # Split entry by max words and drop words larger than max word length
-    processed_entry = TextEntries.split_entries_by_max_tokens([entry], max_word_length=5)[0]
+    processed_entry = TextToEntries.split_entries_by_max_tokens([entry], max_word_length=5)[0]

    # Assert
    # "Heading" dropped from compiled version because its over the set max word limit
@@ -109,11 +109,11 @@ def test_entry_with_body_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=data)
+    entries, entry_to_file_map = OrgToEntries.extract_org_entries(org_files=data)

    # Process Each Entry from All Notes Files
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
-        OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map)
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
+        OrgToEntries.convert_org_nodes_to_entries(entries, entry_to_file_map)
    )
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

@@ -136,11 +136,11 @@ Intro text

    # Act
    # Extract Entries from specified Org files
-    entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=data)
+    entry_nodes, file_to_entries = OrgToEntries.extract_org_entries(org_files=data)

    # Process Each Entry from All Notes Files
-    entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
+    entries = OrgToEntries.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(entries)
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -160,11 +160,11 @@ def test_file_with_no_headings_to_jsonl(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=data)
+    entry_nodes, file_to_entries = OrgToEntries.extract_org_entries(org_files=data)

    # Process Each Entry from All Notes Files
-    entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
-    jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
+    entries = OrgToEntries.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
+    jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(entries)
    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]

    # Assert
@@ -224,7 +224,7 @@ def test_extract_entries_with_different_level_headings(tmp_path):

    # Act
    # Extract Entries from specified Org files
-    entries, _ = OrgToJsonl.extract_org_entries(org_files=data)
+    entries, _ = OrgToEntries.extract_org_entries(org_files=data)

    # Assert
    assert len(entries) == 2