Files
khoj/tests/test_orgnode.py
Debanjum dcfa4288c4 Deep link to org-mode entries. Deep link by line number in uri
Use url fragment schema for deep link URIs, borrowing from URL/PDF
schemas. E.g file:///path/to/file.txt#line=<line_no>&#page=<page_no>

Compute line number during (recursive) org-mode entry chunking.

Thoroughly test line number in URI maps to line number of chunk in
actual org mode file.

This deeplink URI with line number is passed to llm as context to
better combine with line range based view file tool.

Grep tool already passed matching line number. This change passes
line number in URIs of org entries matched by the semantic search tool
2025-07-03 17:38:34 -07:00

403 lines
13 KiB
Python

import datetime
from khoj.processor.content.org_mode import orgnode
# Test
# ----------------------------------------------------------------------------------------------------
def test_parse_entry_with_no_headings(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""Body Line 1"""
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 1
assert entries[0].heading == f"{orgfile}"
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
def test_parse_minimal_entry(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""
* Heading
Body Line 1"""
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 1
assert entries[0].heading == "Heading"
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1\n\n"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
def test_parse_complete_entry(tmp_path):
"Test parsing of entry with all important fields"
# Arrange
entry = f"""
*** DONE [#A] Heading :Tag1:TAG2:tag3:
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
:PROPERTIES:
:ID: 123-456-789-4234-1231
:END:
:LOGBOOK:
CLOCK: [1984-04-01 Sun 09:00]--[1984-04-01 Sun 12:00] => 3:00
- Clocked Log 1
:END:
Body Line 1
Body Line 2"""
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 1
assert entries[0].heading == "Heading"
assert entries[0].todo == "DONE"
assert entries[0].tags == ["Tag1", "TAG2", "tag3"]
assert entries[0].body == "- Clocked Log 1\n\nBody Line 1\n\nBody Line 2\n\n"
assert entries[0].priority == "A"
assert entries[0].Property("ID") == "id:123-456-789-4234-1231"
assert entries[0].closed == datetime.date(1984, 4, 1)
assert entries[0].scheduled == datetime.date(1984, 4, 1)
assert entries[0].deadline == datetime.date(1984, 4, 1)
assert entries[0].logbook == [(datetime.datetime(1984, 4, 1, 9, 0, 0), datetime.datetime(1984, 4, 1, 12, 0, 0))]
# ----------------------------------------------------------------------------------------------------
def test_render_entry_with_property_drawer_and_empty_body(tmp_path):
"Render heading entry with property drawer"
# Arrange
entry_to_render = f"""
*** [#A] Heading1 :tag1:
:PROPERTIES:
:ID: 111-111-111-1111-1111
:END:
\t\r \n
"""
orgfile = create_file(tmp_path, entry_to_render)
expected_entry = f"""*** [#A] Heading1 :tag1:
:PROPERTIES:
:LINE: file://{orgfile}#line=2
:ID: id:111-111-111-1111-1111
:END:
"""
# Act
parsed_entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert f"{parsed_entries[0]}" == expected_entry
# ----------------------------------------------------------------------------------------------------
def test_all_links_to_entry_rendered(tmp_path):
"Ensure all links to entry rendered in property drawer from entry"
# Arrange
entry = f"""
*** [#A] Heading :tag1:
:PROPERTIES:
:ID: 123-456-789-4234-1231
:END:
Body Line 1
*** Heading2
Body Line 2
"""
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
# SOURCE link rendered with Heading
# ID link rendered with ID
assert f":ID: id:123-456-789-4234-1231" in f"{entries[0]}"
# LINE link rendered with line number
assert f":LINE: file://{orgfile}#line=2" in f"{entries[0]}"
# LINE link rendered with line number
assert f":LINE: file://{orgfile}#line=7" in f"{entries[1]}"
# ----------------------------------------------------------------------------------------------------
def test_parse_multiple_entries(tmp_path):
"Test parsing of multiple entries"
# Arrange
content = f"""
*** FAILED [#A] Heading1 :tag1:
CLOSED: [1984-04-01 Sun 12:00] SCHEDULED: <1984-04-01 Sun 09:00> DEADLINE: <1984-04-01 Sun>
:PROPERTIES:
:ID: 123-456-789-4234-0001
:END:
:LOGBOOK:
CLOCK: [1984-04-01 Sun 09:00]--[1984-04-01 Sun 12:00] => 3:00
- Clocked Log 1
:END:
Body 1
*** CANCELLED [#A] Heading2 :tag2:
CLOSED: [1984-04-02 Sun 12:00] SCHEDULED: <1984-04-02 Sun 09:00> DEADLINE: <1984-04-02 Sun>
:PROPERTIES:
:ID: 123-456-789-4234-0002
:END:
:LOGBOOK:
CLOCK: [1984-04-02 Mon 09:00]--[1984-04-02 Mon 12:00] => 3:00
- Clocked Log 2
:END:
Body 2
"""
orgfile = create_file(tmp_path, content)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 2
for index, entry in enumerate(entries):
assert entry.heading == f"Heading{index+1}"
assert entry.todo == "FAILED" if index == 0 else "CANCELLED"
assert entry.tags == [f"tag{index+1}"]
assert entry.body == f"- Clocked Log {index+1}\n\nBody {index+1}\n\n"
assert entry.priority == "A"
assert entry.Property("ID") == f"id:123-456-789-4234-000{index+1}"
assert entry.closed == datetime.date(1984, 4, index + 1)
assert entry.scheduled == datetime.date(1984, 4, index + 1)
assert entry.deadline == datetime.date(1984, 4, index + 1)
assert entry.logbook == [
(datetime.datetime(1984, 4, index + 1, 9, 0, 0), datetime.datetime(1984, 4, index + 1, 12, 0, 0))
]
# ----------------------------------------------------------------------------------------------------
def test_parse_entry_with_empty_title(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""#+TITLE:
Body Line 1"""
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 1
assert entries[0].heading == f"{orgfile}"
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
# ----------------------------------------------------------------------------------------------------
def test_parse_entry_with_title_and_no_headings(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""#+TITLE: test
Body Line 1"""
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 1
assert entries[0].heading == "test"
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
assert entries[0].ancestors == ["test"]
# ----------------------------------------------------------------------------------------------------
def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
"Test parsing of entry with minimal fields"
# Arrange
entry = f"""#+TITLE: title1
Body Line 1
#+TITLE: title2 """
orgfile = create_file(tmp_path, entry)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 1
assert entries[0].heading == "title1 title2"
assert entries[0].tags == list()
assert entries[0].body == "Body Line 1\n"
assert entries[0].priority == ""
assert entries[0].Property("ID") == ""
assert entries[0].closed == ""
assert entries[0].scheduled == ""
assert entries[0].deadline == ""
assert entries[0].ancestors == ["title1 title2"]
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_intro_text_before_heading(tmp_path):
"Test parsing of org file with intro text before heading"
# Arrange
body = f"""#+TITLE: Title
intro body
* Entry Heading
entry body
"""
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 2
assert entries[0].heading == "Title"
assert entries[0].body == "intro body\n"
assert entries[0].ancestors == ["Title"]
assert entries[1].heading == "Entry Heading"
assert entries[1].body == "entry body\n\n"
assert entries[1].ancestors == ["Title"]
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_intro_text_multiple_titles_and_heading(tmp_path):
"Test parsing of org file with intro text, multiple titles and heading entry"
# Arrange
body = f"""#+TITLE: Title1
intro body
* Entry Heading
entry body
#+TITLE: Title2 """
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 2
assert entries[0].heading == "Title1 Title2"
assert entries[0].body == "intro body\n"
assert entries[0].ancestors == ["Title1 Title2"]
assert entries[1].heading == "Entry Heading"
assert entries[1].body == "entry body\n\n"
assert entries[0].ancestors == ["Title1 Title2"]
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_single_ancestor_heading(tmp_path):
"Parse org entries with parent headings context"
# Arrange
body = f"""
* Heading 1
body 1
** Sub Heading 1
"""
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 2
assert entries[0].heading == "Heading 1"
assert entries[0].ancestors == [f"{orgfile}"]
assert entries[1].heading == "Sub Heading 1"
assert entries[1].ancestors == [f"{orgfile}", "Heading 1"]
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_multiple_ancestor_headings(tmp_path):
"Parse org entries with parent headings context"
# Arrange
body = f"""
* Heading 1
body 1
** Sub Heading 1
*** Sub Sub Heading 1
sub sub body 1
"""
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 3
assert entries[0].heading == "Heading 1"
assert entries[0].ancestors == [f"{orgfile}"]
assert entries[1].heading == "Sub Heading 1"
assert entries[1].ancestors == [f"{orgfile}", "Heading 1"]
assert entries[2].heading == "Sub Sub Heading 1"
assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"]
# ----------------------------------------------------------------------------------------------------
def test_parse_org_with_multiple_ancestor_headings_of_siblings(tmp_path):
"Parse org entries with parent headings context"
# Arrange
body = f"""
* Heading 1
body 1
** Sub Heading 1
*** Sub Sub Heading 1
sub sub body 1
*** Sub Sub Heading 2
** Sub Heading 2
*** Sub Sub Heading 3
"""
orgfile = create_file(tmp_path, body)
# Act
entries = orgnode.makelist_with_filepath(orgfile)
# Assert
assert len(entries) == 6
assert entries[0].heading == "Heading 1"
assert entries[0].ancestors == [f"{orgfile}"]
assert entries[1].heading == "Sub Heading 1"
assert entries[1].ancestors == [f"{orgfile}", "Heading 1"]
assert entries[2].heading == "Sub Sub Heading 1"
assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"]
assert entries[3].heading == "Sub Sub Heading 2"
assert entries[3].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"]
assert entries[4].heading == "Sub Heading 2"
assert entries[4].ancestors == [f"{orgfile}", "Heading 1"]
assert entries[5].heading == "Sub Sub Heading 3"
assert entries[5].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 2"]
# Helper Functions
def create_file(tmp_path, entry, filename="test.org"):
org_file = tmp_path / f"notes/{filename}"
org_file.parent.mkdir()
org_file.touch()
org_file.write_text(entry)
return org_file