Reduce structural changes to indexed raw org mode entries

Reduce structural changes to raw entry allows better deep-linking and
re-annotation. Currently done via line number in new uri field.

Only add properties drawer to raw entry if entry has properties
Previously line and source properties were inserted into raw entries.
This isn't done anymore. Line, source are deprecated for use in khoj.el.
This commit is contained in:
Debanjum
2025-06-23 15:42:58 -07:00
parent 870d9d851a
commit 5c4d41d300
2 changed files with 8 additions and 19 deletions

View File

@@ -66,7 +66,7 @@ def makelist(file, filename) -> List["Orgnode"]:
ctr = 0
if type(file) == str:
f = file.split("\n")
f = file.splitlines()
else:
f = file
@@ -121,7 +121,7 @@ def makelist(file, filename) -> List["Orgnode"]:
heading = heading_search.group(2)
bodytext = ""
tags = list() # set of all tags in headline
tag_search = re.search(r"(.*?)\s*:([a-zA-Z0-9].*?):$", heading)
tag_search = re.search(r"(.*?)\s+:([a-zA-Z0-9@_].*?):\s*$", heading)
if tag_search:
heading = tag_search.group(1)
parsedtags = tag_search.group(2)
@@ -260,14 +260,6 @@ def makelist(file, filename) -> List["Orgnode"]:
# Prefix filepath/title to ancestors
n.ancestors = [file_title] + n.ancestors
# Set SOURCE property to a file+heading based org-mode link to the entry
if n.level == 0:
n.properties["LINE"] = f"file:{normalize_filename(filename)}::0"
n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}]]"
else:
escaped_heading = n.heading.replace("[", "\\[").replace("]", "\\]")
n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}::*{escaped_heading}]]"
return nodelist
@@ -520,10 +512,11 @@ class Orgnode(object):
n = n + "\n"
# Output Property Drawer
n = n + indent + ":PROPERTIES:\n"
for key, value in self._properties.items():
n = n + indent + f":{key}: {value}\n"
n = n + indent + ":END:\n"
if self._properties:
n = n + indent + ":PROPERTIES:\n"
for key, value in self._properties.items():
n = n + indent + f":{key}: {value}\n"
n = n + indent + ":END:\n"
# Output Body
if self.hasBody:

View File

@@ -147,12 +147,10 @@ body line 1.1
# Extract Entries from specified Org files
extracted_entries = OrgToEntries.extract_org_entries(org_files=data, max_tokens=12)
assert len(extracted_entries) == 2
for entry in extracted_entries[1]:
entry.raw = clean(entry.raw)
# Assert
assert len(extracted_entries[1]) == 1
assert entry.raw == expected_entry
assert extracted_entries[1][-1].raw == expected_entry
def test_parse_org_entry_with_children_as_single_entry_if_small(tmp_path):
@@ -388,8 +386,6 @@ def test_extract_entries_with_different_level_headings(tmp_path):
# Extract Entries from specified Org files
entries = OrgToEntries.extract_org_entries(org_files=data, index_heading_entries=True, max_tokens=3)
assert len(entries) == 2
for entry in entries[1]:
entry.raw = clean(f"{entry.raw}")
# Assert
assert len(entries[1]) == 2