diff --git a/src/khoj/processor/content/org_mode/orgnode.py b/src/khoj/processor/content/org_mode/orgnode.py index f81c6e1b..e190e17a 100644 --- a/src/khoj/processor/content/org_mode/orgnode.py +++ b/src/khoj/processor/content/org_mode/orgnode.py @@ -66,7 +66,7 @@ def makelist(file, filename) -> List["Orgnode"]: ctr = 0 if type(file) == str: - f = file.split("\n") + f = file.splitlines() else: f = file @@ -121,7 +121,7 @@ def makelist(file, filename) -> List["Orgnode"]: heading = heading_search.group(2) bodytext = "" tags = list() # set of all tags in headline - tag_search = re.search(r"(.*?)\s*:([a-zA-Z0-9].*?):$", heading) + tag_search = re.search(r"(.*?)\s+:([a-zA-Z0-9@_].*?):\s*$", heading) if tag_search: heading = tag_search.group(1) parsedtags = tag_search.group(2) @@ -260,14 +260,6 @@ def makelist(file, filename) -> List["Orgnode"]: # Prefix filepath/title to ancestors n.ancestors = [file_title] + n.ancestors - # Set SOURCE property to a file+heading based org-mode link to the entry - if n.level == 0: - n.properties["LINE"] = f"file:{normalize_filename(filename)}::0" - n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}]]" - else: - escaped_heading = n.heading.replace("[", "\\[").replace("]", "\\]") - n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}::*{escaped_heading}]]" - return nodelist @@ -520,10 +512,11 @@ class Orgnode(object): n = n + "\n" # Output Property Drawer - n = n + indent + ":PROPERTIES:\n" - for key, value in self._properties.items(): - n = n + indent + f":{key}: {value}\n" - n = n + indent + ":END:\n" + if self._properties: + n = n + indent + ":PROPERTIES:\n" + for key, value in self._properties.items(): + n = n + indent + f":{key}: {value}\n" + n = n + indent + ":END:\n" # Output Body if self.hasBody: diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index a84fe6e8..5c11a6fd 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -147,12 +147,10 @@ body line 1.1 # Extract Entries from specified Org files extracted_entries = OrgToEntries.extract_org_entries(org_files=data, max_tokens=12) assert len(extracted_entries) == 2 - for entry in extracted_entries[1]: - entry.raw = clean(entry.raw) # Assert assert len(extracted_entries[1]) == 1 - assert entry.raw == expected_entry + assert extracted_entries[1][-1].raw == expected_entry def test_parse_org_entry_with_children_as_single_entry_if_small(tmp_path): @@ -388,8 +386,6 @@ def test_extract_entries_with_different_level_headings(tmp_path): # Extract Entries from specified Org files entries = OrgToEntries.extract_org_entries(org_files=data, index_heading_entries=True, max_tokens=3) assert len(entries) == 2 - for entry in entries[1]: - entry.raw = clean(f"{entry.raw}") # Assert assert len(entries[1]) == 2