mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Set filename as top heading of org entries for better search context
Previously filename was only being appended to markdown entries. Test filename getting prepended to compiled entry as heading
This commit is contained in:
@@ -47,6 +47,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
|
||||
Body Line
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
expected_heading = f"* {orgfile.stem}\n** Heading"
|
||||
|
||||
# Act
|
||||
# Extract Entries from specified Org files
|
||||
@@ -55,7 +56,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
|
||||
# Split each entry from specified Org files by max words
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(
|
||||
TextToJsonl.split_entries_by_max_tokens(
|
||||
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=2
|
||||
OrgToJsonl.convert_org_nodes_to_entries(entries, entry_to_file_map), max_tokens=4
|
||||
)
|
||||
)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
@@ -63,7 +64,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path):
|
||||
# Assert
|
||||
assert len(jsonl_data) == 2
|
||||
# Ensure compiled entries split by max_words start with entry heading (for search context)
|
||||
assert all(entry["compiled"].startswith("Heading") for entry in jsonl_data)
|
||||
assert all([entry["compiled"].startswith(expected_heading) for entry in jsonl_data])
|
||||
|
||||
|
||||
def test_entry_split_drops_large_words():
|
||||
|
||||
Reference in New Issue
Block a user