Generate compiled form of each entry directly in the org-mode processor

- The logic for compiling an org-mode entry (for later encoding) now
  completely resides in the org-to-jsonl processor layer

- This allows asymmetric search to be generic and not be aware of
  org-mode specific properties that were extracted by the org-to-jsonl
  processor layer

- Now asymmetric search just expects the jsonl to (at least) have the
  'compiled' and 'raw' keys for each entry. What original text the
  entry was compiled from is irrelevant to it. The original text
  could be mail, chat, markdown, org-mode etc, it doesn't have to care
This commit is contained in:
Debanjum Singh Solanky
2022-07-21 01:47:51 +04:00
parent 4ead79d272
commit 06cf425314
2 changed files with 12 additions and 18 deletions

View File

@@ -10,6 +10,7 @@ import gzip
# Internal Packages
from src.processor.org_mode import orgnode
from src.utils.helpers import get_absolute_path, is_none_or_empty
from src.utils.constants import empty_escape_sequences
# Define Functions
@@ -105,33 +106,37 @@ def convert_org_entries_to_jsonl(entries, verbose=0):
for entry in entries:
entry_dict = dict()
entry_dict["Title"] = entry.Heading()
# Ignore title notes i.e notes with just headings and empty body
if not entry.Body() or entry.Body().strip(empty_escape_sequences) == "":
continue
entry_dict["compiled"] = f'{entry.Heading()}.'
if verbose > 1:
print(f"Title: {entry.Heading()}")
if entry.Tags():
tags_str = " ".join(entry.Tags())
entry_dict["Tags"] = tags_str
entry_dict["compiled"] += f'\t {tags_str}.'
if verbose > 2:
print(f"Tags: {tags_str}")
if entry.Closed():
entry_dict["Closed"] = entry.Closed().strftime("%Y-%m-%d")
entry_dict["compiled"] += f'\n Closed on {entry.Closed().strftime("%Y-%m-%d")}.'
if verbose > 2:
print(f'Closed: {entry.Closed().strftime("%Y-%m-%d")}')
if entry.Scheduled():
entry_dict["Scheduled"] = entry.Scheduled().strftime("%Y-%m-%d")
entry_dict["compiled"] += f'\n Scheduled for {entry.Scheduled().strftime("%Y-%m-%d")}.'
if verbose > 2:
print(f'Scheduled: {entry.Scheduled().strftime("%Y-%m-%d")}')
if entry.Body():
entry_dict["Body"] = entry.Body()
entry_dict["compiled"] += f'\n {entry.Body()}'
if verbose > 2:
print(f"Body: {entry.Body()}")
if entry_dict:
entry_dict["Raw"] = f'{entry}'
entry_dict["raw"] = f'{entry}'
# Convert Dictionary to JSON and Append to JSONL string
jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n'

View File

@@ -55,18 +55,7 @@ def extract_entries(notesfile, verbose=0):
# Read File
for line in jsonl_file:
note = json.loads(line.strip(empty_escape_sequences))
# Ignore title notes i.e notes with just headings and empty body
if not "Body" in note or note["Body"].strip(empty_escape_sequences) == "":
continue
scheduled_str = f'\t Scheduled for {note["Scheduled"]}' if "Scheduled" in note else ""
closed_str = f'\t Closed on {note["Closed"]}' if "Closed" in note else ""
tags_str = f'\t {note["Tags"]}' if "Tags" in note else ""
body_str = f'\n {note["Body"]}' if "Body" in note else ""
note_string = f'{note["Title"]}{tags_str}{closed_str}{scheduled_str}{body_str}'
entries.append({'compiled': note_string, 'raw': note["Raw"]})
entries.append({'compiled': note['compiled'], 'raw': note["raw"]})
# Close File
jsonl_file.close()