From 06cf4253145b86d499d48d03cf1aafd7deca5cc4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 21 Jul 2022 01:47:51 +0400 Subject: [PATCH] Generate compiled form of each entry directly in the org-mode processor - The logic for compiling an org-mode entry (for later encoding) now completely resides in the org-to-jsonl processor layer - This allows asymmetric search to be generic and not be aware of org-mode specific properties that were extracted by the org-to-jsonl processor layer - Now asymmetric search just expects the jsonl to (at least) have the 'compiled' and 'raw' keys for each entry. What original text the entry was compiled from is irrelevant to it. The original text could be mail, chat, markdown, org-mode etc, it doesn't have to care --- src/processor/org_mode/org_to_jsonl.py | 17 +++++++++++------ src/search_type/asymmetric.py | 13 +------------ 2 files changed, 12 insertions(+), 18 deletions(-) diff --git a/src/processor/org_mode/org_to_jsonl.py b/src/processor/org_mode/org_to_jsonl.py index a7e734cc..871125b1 100644 --- a/src/processor/org_mode/org_to_jsonl.py +++ b/src/processor/org_mode/org_to_jsonl.py @@ -10,6 +10,7 @@ import gzip # Internal Packages from src.processor.org_mode import orgnode from src.utils.helpers import get_absolute_path, is_none_or_empty +from src.utils.constants import empty_escape_sequences # Define Functions @@ -105,33 +106,37 @@ def convert_org_entries_to_jsonl(entries, verbose=0): for entry in entries: entry_dict = dict() - entry_dict["Title"] = entry.Heading() + # Ignore title notes i.e notes with just headings and empty body + if not entry.Body() or entry.Body().strip(empty_escape_sequences) == "": + continue + + entry_dict["compiled"] = f'{entry.Heading()}.' if verbose > 1: print(f"Title: {entry.Heading()}") if entry.Tags(): tags_str = " ".join(entry.Tags()) - entry_dict["Tags"] = tags_str + entry_dict["compiled"] += f'\t {tags_str}.' if verbose > 2: print(f"Tags: {tags_str}") if entry.Closed(): - entry_dict["Closed"] = entry.Closed().strftime("%Y-%m-%d") + entry_dict["compiled"] += f'\n Closed on {entry.Closed().strftime("%Y-%m-%d")}.' if verbose > 2: print(f'Closed: {entry.Closed().strftime("%Y-%m-%d")}') if entry.Scheduled(): - entry_dict["Scheduled"] = entry.Scheduled().strftime("%Y-%m-%d") + entry_dict["compiled"] += f'\n Scheduled for {entry.Scheduled().strftime("%Y-%m-%d")}.' if verbose > 2: print(f'Scheduled: {entry.Scheduled().strftime("%Y-%m-%d")}') if entry.Body(): - entry_dict["Body"] = entry.Body() + entry_dict["compiled"] += f'\n {entry.Body()}' if verbose > 2: print(f"Body: {entry.Body()}") if entry_dict: - entry_dict["Raw"] = f'{entry}' + entry_dict["raw"] = f'{entry}' # Convert Dictionary to JSON and Append to JSONL string jsonl += f'{json.dumps(entry_dict, ensure_ascii=False)}\n' diff --git a/src/search_type/asymmetric.py b/src/search_type/asymmetric.py index 4b28f1dc..8e6e9db4 100644 --- a/src/search_type/asymmetric.py +++ b/src/search_type/asymmetric.py @@ -55,18 +55,7 @@ def extract_entries(notesfile, verbose=0): # Read File for line in jsonl_file: note = json.loads(line.strip(empty_escape_sequences)) - - # Ignore title notes i.e notes with just headings and empty body - if not "Body" in note or note["Body"].strip(empty_escape_sequences) == "": - continue - - scheduled_str = f'\t Scheduled for {note["Scheduled"]}' if "Scheduled" in note else "" - closed_str = f'\t Closed on {note["Closed"]}' if "Closed" in note else "" - tags_str = f'\t {note["Tags"]}' if "Tags" in note else "" - body_str = f'\n {note["Body"]}' if "Body" in note else "" - - note_string = f'{note["Title"]}{tags_str}{closed_str}{scheduled_str}{body_str}' - entries.append({'compiled': note_string, 'raw': note["Raw"]}) + entries.append({'compiled': note['compiled'], 'raw': note["raw"]}) # Close File jsonl_file.close()