From 305c25ae1ade1c334f47b3026d49fe397986eb2b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 16 Nov 2023 00:13:39 -0800 Subject: [PATCH] Track ancestor headings for each org-mode entry in org-node parser --- src/khoj/processor/org_mode/orgnode.py | 28 +++++++- tests/test_orgnode.py | 91 ++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/src/khoj/processor/org_mode/orgnode.py b/src/khoj/processor/org_mode/orgnode.py index db660ee7..68508666 100644 --- a/src/khoj/processor/org_mode/orgnode.py +++ b/src/khoj/processor/org_mode/orgnode.py @@ -80,6 +80,7 @@ def makelist(file, filename): } # populated from #+SEQ_TODO line level = "" heading = "" + ancestor_headings = [f"{filename}"] bodytext = "" introtext = "" tags = list() # set of all tags in headline @@ -98,7 +99,7 @@ def makelist(file, filename): heading_search = re.search(r"^(\*+)\s(.*?)\s*$", line) if heading_search: # we are processing a heading line if heading: # if we have are on second heading, append first heading to headings list - thisNode = Orgnode(level, heading, bodytext, tags) + thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings) if closed_date: thisNode.closed = closed_date closed_date = "" @@ -114,6 +115,8 @@ def makelist(file, filename): thisNode.properties = property_map nodelist.append(thisNode) property_map = {"LINE": f"file:{normalize_filename(filename)}::{ctr}"} + previous_level = level + previous_heading = heading level = heading_search.group(1) heading = heading_search.group(2) bodytext = "" @@ -126,6 +129,17 @@ def makelist(file, filename): for parsedtag in parsedtags.split(":"): if parsedtag != "": tags.append(parsedtag) + + # Add previous heading to ancestors if current heading is deeper than previous level + if len(level) > len(previous_level) and previous_heading: + ancestor_headings.append(previous_heading) + # Remove last ancestor(s) if current heading is shallower than previous level + elif len(level) < len(previous_level): + for _ in range(len(level), len(previous_level)): + if not ancestor_headings or len(ancestor_headings) == 0: + break + ancestor_headings.pop() + else: # we are processing a non-heading line if line[:10] == "#+SEQ_TODO": kwlist = re.findall(r"([A-Z]+)\(", line) @@ -216,7 +230,7 @@ def makelist(file, filename): nodelist = [thisNode] + nodelist # write out last heading node if heading: - thisNode = Orgnode(level, heading, bodytext, tags) + thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings) thisNode.properties = property_map if sched_date: thisNode.scheduled = sched_date @@ -261,7 +275,7 @@ class Orgnode(object): with the headline. """ - def __init__(self, level, headline, body, tags): + def __init__(self, level, headline, body, tags, ancestor_headings=[]): """ Create an Orgnode object given the parameters of level (as the raw asterisks), headline text (including the TODO tag), and @@ -279,9 +293,17 @@ class Orgnode(object): self._closed = "" # Closed date self._properties = dict() self._logbook = list() # List of clock-in, clock-out tuples representing logbook entries + self._ancestor_headings = ancestor_headings.copy() # Look for priority in headline and transfer to prty field + @property + def ancestors(self): + """ + Return the Heading text of the node without the TODO tag + """ + return self._ancestor_headings + @property def heading(self): """ diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index c6ed3447..4ef12661 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -161,6 +161,8 @@ Body Line 1""" assert len(entries) == 1 # parsed heading from entry assert entries[0].heading == "Heading[1]" + # track ancestors of entry + assert entries[0].ancestors == [f"{orgfile}"] # ensure SOURCE link has square brackets in filename, heading escaped in rendered entries escaped_orgfile = f"{orgfile}".replace("[1]", "\\[1\\]") assert f":SOURCE: [[file:{escaped_orgfile}::*Heading\\[1\\]" in f"{entries[0]}" @@ -260,6 +262,7 @@ Body Line 1""" assert entries[0].closed == "" assert entries[0].scheduled == "" assert entries[0].deadline == "" + assert entries[0].ancestors == [] # ---------------------------------------------------------------------------------------------------- @@ -284,6 +287,7 @@ Body Line 1 assert entries[0].closed == "" assert entries[0].scheduled == "" assert entries[0].deadline == "" + assert entries[0].ancestors == [] # ---------------------------------------------------------------------------------------------------- @@ -304,8 +308,10 @@ entry body assert len(entries) == 2 assert entries[0].heading == "Title" assert entries[0].body == "intro body\n" + assert entries[0].ancestors == [] assert entries[1].heading == "Entry Heading" assert entries[1].body == "entry body\n\n" + assert entries[1].ancestors == [f"{orgfile}"] # ---------------------------------------------------------------------------------------------------- @@ -326,8 +332,93 @@ entry body assert len(entries) == 2 assert entries[0].heading == "Title1 Title2" assert entries[0].body == "intro body\n" + assert entries[0].ancestors == [] assert entries[1].heading == "Entry Heading" assert entries[1].body == "entry body\n\n" + assert entries[1].ancestors == [f"{orgfile}"] + + +# ---------------------------------------------------------------------------------------------------- +def test_parse_org_with_single_ancestor_heading(tmp_path): + "Parse org entries with parent headings context" + # Arrange + body = f""" +* Heading 1 +body 1 +** Sub Heading 1 +""" + orgfile = create_file(tmp_path, body) + + # Act + entries = orgnode.makelist_with_filepath(orgfile) + + # Assert + assert len(entries) == 2 + assert entries[0].heading == "Heading 1" + assert entries[0].ancestors == [f"{orgfile}"] + assert entries[1].heading == "Sub Heading 1" + assert entries[1].ancestors == [f"{orgfile}", "Heading 1"] + + +# ---------------------------------------------------------------------------------------------------- +def test_parse_org_with_multiple_ancestor_headings(tmp_path): + "Parse org entries with parent headings context" + # Arrange + body = f""" +* Heading 1 +body 1 +** Sub Heading 1 +*** Sub Sub Heading 1 +sub sub body 1 +""" + orgfile = create_file(tmp_path, body) + + # Act + entries = orgnode.makelist_with_filepath(orgfile) + + # Assert + assert len(entries) == 3 + assert entries[0].heading == "Heading 1" + assert entries[0].ancestors == [f"{orgfile}"] + assert entries[1].heading == "Sub Heading 1" + assert entries[1].ancestors == [f"{orgfile}", "Heading 1"] + assert entries[2].heading == "Sub Sub Heading 1" + assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"] + + +# ---------------------------------------------------------------------------------------------------- +def test_parse_org_with_multiple_ancestor_headings_of_siblings(tmp_path): + "Parse org entries with parent headings context" + # Arrange + body = f""" +* Heading 1 +body 1 +** Sub Heading 1 +*** Sub Sub Heading 1 +sub sub body 1 +*** Sub Sub Heading 2 +** Sub Heading 2 +*** Sub Sub Heading 3 +""" + orgfile = create_file(tmp_path, body) + + # Act + entries = orgnode.makelist_with_filepath(orgfile) + + # Assert + assert len(entries) == 6 + assert entries[0].heading == "Heading 1" + assert entries[0].ancestors == [f"{orgfile}"] + assert entries[1].heading == "Sub Heading 1" + assert entries[1].ancestors == [f"{orgfile}", "Heading 1"] + assert entries[2].heading == "Sub Sub Heading 1" + assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"] + assert entries[3].heading == "Sub Sub Heading 2" + assert entries[3].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"] + assert entries[4].heading == "Sub Heading 2" + assert entries[4].ancestors == [f"{orgfile}", "Heading 1"] + assert entries[5].heading == "Sub Sub Heading 3" + assert entries[5].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 2"] # Helper Functions