mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 21:29:12 +00:00
Index intro text before headings in org files
- Text before headings was not being indexed due to buggy orgnode parsing logic - Resolved indexing intro text from files with and without headings in them - Ensure intro text node has heading set to all title lines collected from the file Resolves #165
This commit is contained in:
@@ -108,6 +108,30 @@ def test_entry_with_body_to_jsonl(tmp_path):
|
||||
assert len(jsonl_data) == 1
|
||||
|
||||
|
||||
def test_file_with_entry_after_intro_text_to_jsonl(tmp_path):
|
||||
"Ensure intro text before any headings is indexed."
|
||||
# Arrange
|
||||
entry = f"""
|
||||
Intro text
|
||||
|
||||
* Entry Heading
|
||||
entry body
|
||||
"""
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
# Extract Entries from specified Org files
|
||||
entry_nodes, file_to_entries = OrgToJsonl.extract_org_entries(org_files=[orgfile])
|
||||
|
||||
# Process Each Entry from All Notes Files
|
||||
entries = OrgToJsonl.convert_org_nodes_to_entries(entry_nodes, file_to_entries)
|
||||
jsonl_string = OrgToJsonl.convert_org_entries_to_jsonl(entries)
|
||||
jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
|
||||
|
||||
# Assert
|
||||
assert len(jsonl_data) == 2
|
||||
|
||||
|
||||
def test_file_with_no_headings_to_jsonl(tmp_path):
|
||||
"Ensure files with no heading, only body text are loaded."
|
||||
# Arrange
|
||||
|
||||
@@ -268,7 +268,7 @@ def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
|
||||
# Arrange
|
||||
entry = f"""#+TITLE: title1
|
||||
Body Line 1
|
||||
#+TITLE: title2 """
|
||||
#+TITLE: title2 """
|
||||
orgfile = create_file(tmp_path, entry)
|
||||
|
||||
# Act
|
||||
@@ -286,6 +286,50 @@ Body Line 1
|
||||
assert entries[0].deadline == ""
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_parse_org_with_intro_text_before_heading(tmp_path):
|
||||
"Test parsing of org file with intro text before heading"
|
||||
# Arrange
|
||||
body = f"""#+TITLE: Title
|
||||
intro body
|
||||
* Entry Heading
|
||||
entry body
|
||||
"""
|
||||
orgfile = create_file(tmp_path, body)
|
||||
|
||||
# Act
|
||||
entries = orgnode.makelist(orgfile)
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 2
|
||||
assert entries[0].heading == "Title"
|
||||
assert entries[0].body == "intro body\n"
|
||||
assert entries[1].heading == "Entry Heading"
|
||||
assert entries[1].body == "entry body\n"
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_parse_org_with_intro_text_multiple_titles_and_heading(tmp_path):
|
||||
"Test parsing of org file with intro text, multiple titles and heading entry"
|
||||
# Arrange
|
||||
body = f"""#+TITLE: Title1
|
||||
intro body
|
||||
* Entry Heading
|
||||
entry body
|
||||
#+TITLE: Title2 """
|
||||
orgfile = create_file(tmp_path, body)
|
||||
|
||||
# Act
|
||||
entries = orgnode.makelist(orgfile)
|
||||
|
||||
# Assert
|
||||
assert len(entries) == 2
|
||||
assert entries[0].heading == "Title1 Title2"
|
||||
assert entries[0].body == "intro body\n"
|
||||
assert entries[1].heading == "Entry Heading"
|
||||
assert entries[1].body == "entry body\n"
|
||||
|
||||
|
||||
# Helper Functions
|
||||
def create_file(tmp_path, entry, filename="test.org"):
|
||||
org_file = tmp_path / f"notes/{filename}"
|
||||
|
||||
Reference in New Issue
Block a user