From 5c4d41d30080a6c4506a86354881b0f57dc32292 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 23 Jun 2025 15:42:58 -0700 Subject: [PATCH] Reduce structural changes to indexed raw org mode entries Reduce structural changes to raw entry allows better deep-linking and re-annotation. Currently done via line number in new uri field. Only add properties drawer to raw entry if entry has properties Previously line and source properties were inserted into raw entries. This isn't done anymore. Line, source are deprecated for use in khoj.el. --- .../processor/content/org_mode/orgnode.py | 21 +++++++------------ tests/test_org_to_entries.py | 6 +----- 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/src/khoj/processor/content/org_mode/orgnode.py b/src/khoj/processor/content/org_mode/orgnode.py index f81c6e1b..e190e17a 100644 --- a/src/khoj/processor/content/org_mode/orgnode.py +++ b/src/khoj/processor/content/org_mode/orgnode.py @@ -66,7 +66,7 @@ def makelist(file, filename) -> List["Orgnode"]: ctr = 0 if type(file) == str: - f = file.split("\n") + f = file.splitlines() else: f = file @@ -121,7 +121,7 @@ def makelist(file, filename) -> List["Orgnode"]: heading = heading_search.group(2) bodytext = "" tags = list() # set of all tags in headline - tag_search = re.search(r"(.*?)\s*:([a-zA-Z0-9].*?):$", heading) + tag_search = re.search(r"(.*?)\s+:([a-zA-Z0-9@_].*?):\s*$", heading) if tag_search: heading = tag_search.group(1) parsedtags = tag_search.group(2) @@ -260,14 +260,6 @@ def makelist(file, filename) -> List["Orgnode"]: # Prefix filepath/title to ancestors n.ancestors = [file_title] + n.ancestors - # Set SOURCE property to a file+heading based org-mode link to the entry - if n.level == 0: - n.properties["LINE"] = f"file:{normalize_filename(filename)}::0" - n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}]]" - else: - escaped_heading = n.heading.replace("[", "\\[").replace("]", "\\]") - n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}::*{escaped_heading}]]" - return nodelist @@ -520,10 +512,11 @@ class Orgnode(object): n = n + "\n" # Output Property Drawer - n = n + indent + ":PROPERTIES:\n" - for key, value in self._properties.items(): - n = n + indent + f":{key}: {value}\n" - n = n + indent + ":END:\n" + if self._properties: + n = n + indent + ":PROPERTIES:\n" + for key, value in self._properties.items(): + n = n + indent + f":{key}: {value}\n" + n = n + indent + ":END:\n" # Output Body if self.hasBody: diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index a84fe6e8..5c11a6fd 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -147,12 +147,10 @@ body line 1.1 # Extract Entries from specified Org files extracted_entries = OrgToEntries.extract_org_entries(org_files=data, max_tokens=12) assert len(extracted_entries) == 2 - for entry in extracted_entries[1]: - entry.raw = clean(entry.raw) # Assert assert len(extracted_entries[1]) == 1 - assert entry.raw == expected_entry + assert extracted_entries[1][-1].raw == expected_entry def test_parse_org_entry_with_children_as_single_entry_if_small(tmp_path): @@ -388,8 +386,6 @@ def test_extract_entries_with_different_level_headings(tmp_path): # Extract Entries from specified Org files entries = OrgToEntries.extract_org_entries(org_files=data, index_heading_entries=True, max_tokens=3) assert len(entries) == 2 - for entry in entries[1]: - entry.raw = clean(f"{entry.raw}") # Assert assert len(entries[1]) == 2