Rename Files, Classes from X_To_JSONL to more appropriate X_To_Entries

These content processors are converting content into entries in DB
instead of entries in JSONL file
This commit is contained in:
Debanjum Singh Solanky
2023-11-01 14:51:33 -07:00
parent 2ad2055bcb
commit d92a2d03a7
16 changed files with 127 additions and 125 deletions

View File

@@ -3,8 +3,8 @@ import json
import os
# Internal Packages
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
from khoj.processor.text_to_jsonl import TextEntries
from khoj.processor.org_mode.org_to_entries import OrgToEntries
from khoj.processor.text_to_entries import TextToEntries
from khoj.utils.helpers import is_none_or_empty
from khoj.utils.rawconfig import Entry
from khoj.utils.fs_syncer import get_org_files
@@ -29,9 +29,9 @@ def test_configure_heading_entry_to_jsonl(tmp_path):
for index_heading_entries in [True, False]:
# Act
# Extract entries into jsonl from specified Org files
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
OrgToJsonl.convert_org_nodes_to_entries(
*OrgToJsonl.extract_org_entries(org_files=data), index_heading_entries=index_heading_entries
jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
OrgToEntries.convert_org_nodes_to_entries(
*OrgToEntries.extract_org_entries(org_files=data), index_heading_entries=index_heading_entries
)
)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -59,12 +59,12 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
# Act
# Extract Entries from specified Org files
entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=data)
entries, entry_to_file_map = OrgToEntries.extract_org_entries(org_files=data)
# Split each entry from specified Org files by max words
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
TextEntries.split_entries_by_max_tokens(
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
TextToEntries.split_entries_by_max_tokens(
OrgToEntries.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
)
)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -86,7 +86,7 @@ def test_entry_split_drops_large_words():
# Act
# Split entry by max words and drop words larger than max word length
processed_entry = TextEntries.split_entries_by_max_tokens([entry], max_word_length=5)[0]
processed_entry = TextToEntries.split_entries_by_max_tokens([entry], max_word_length=5)[0]
# Assert
# "Heading" dropped from compiled version because its over the set max word limit
@@ -109,11 +109,11 @@ def test_entry_with_body_to_jsonl(tmp_path):
# Act
# Extract Entries from specified Org files
entries, entry_to_file_map = OrgToJsonl.extract_org_entries(org_files=data)
entries, entry_to_file_map = OrgToEntries.extract_org_entries(org_files=data)
# Process Each Entry from All Notes Files
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map)
jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(
OrgToEntries.convert_org_nodes_to_entries(entries, entry_to_file_map)
)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
@@ -136,11 +136,11 @@ Intro text
# Act
# Extract Entries from specified Org files
entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=data)
entry_nodes, file_to_entries = OrgToEntries.extract_org_entries(org_files=data)
# Process Each Entry from All Notes Files
entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
entries = OrgToEntries.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(entries)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
# Assert
@@ -160,11 +160,11 @@ def test_file_with_no_headings_to_jsonl(tmp_path):
# Act
# Extract Entries from specified Org files
entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=data)
entry_nodes, file_to_entries = OrgToEntries.extract_org_entries(org_files=data)
# Process Each Entry from All Notes Files
entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
entries = OrgToEntries.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
jsonl_string = OrgToEntries.convert_org_entries_to_jsonl(entries)
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
# Assert
@@ -224,7 +224,7 @@ def test_extract_entries_with_different_level_headings(tmp_path):
# Act
# Extract Entries from specified Org files
entries, _ = OrgToJsonl.extract_org_entries(org_files=data)
entries, _ = OrgToEntries.extract_org_entries(org_files=data)
# Assert
assert len(entries) == 2