mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Remove unused Entry to Jsonl converter from text to entry class, tests
This was earlier used when the index was plaintext jsonl file. Now that documents are indexed in a DB this func is not required. Simplify org,md,pdf,plaintext to entries tests by removing the entry to jsonl conversion step
This commit is contained in:
@@ -23,18 +23,14 @@ def test_markdown_file_with_no_headings_to_jsonl(tmp_path):
|
||||
# Extract Entries from specified Markdown files
|
||||
entries = MarkdownToEntries.extract_markdown_entries(markdown_files=data)
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToEntries.convert_markdown_maps_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 1
|
||||
assert len(entries) == 1
|
||||
# Ensure raw entry with no headings do not get heading prefix prepended
|
||||
assert not jsonl_data[0]["raw"].startswith("#")
|
||||
assert not entries[0].raw.startswith("#")
|
||||
# Ensure compiled entry has filename prepended as top level heading
|
||||
assert expected_heading in jsonl_data[0]["compiled"]
|
||||
assert entries[0].compiled.startswith(expected_heading)
|
||||
# Ensure compiled entry also includes the file name
|
||||
assert str(tmp_path) in jsonl_data[0]["compiled"]
|
||||
assert str(tmp_path) in entries[0].compiled
|
||||
|
||||
|
||||
def test_single_markdown_entry_to_jsonl(tmp_path):
|
||||
@@ -52,12 +48,8 @@ def test_single_markdown_entry_to_jsonl(tmp_path):
|
||||
# Extract Entries from specified Markdown files
|
||||
entries = MarkdownToEntries.extract_markdown_entries(markdown_files=data)
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToEntries.convert_markdown_maps_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 1
|
||||
assert len(entries) == 1
|
||||
|
||||
|
||||
def test_multiple_markdown_entries_to_jsonl(tmp_path):
|
||||
@@ -79,12 +71,8 @@ def test_multiple_markdown_entries_to_jsonl(tmp_path):
|
||||
# Extract Entries from specified Markdown files
|
||||
entries = MarkdownToEntries.extract_markdown_entries(markdown_files=data)
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
jsonl_string = MarkdownToEntries.convert_markdown_maps_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 2
|
||||
assert len(entries) == 2
|
||||
# Ensure entry compiled strings include the markdown files they originate from
|
||||
assert all([tmp_path.stem in entry.compiled for entry in entries])
|
||||
|
||||
|
||||
Reference in New Issue
Block a user