From ea4fdd913483887b6d234da978a1adbcce0fd5c0 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 21 Aug 2022 19:41:40 +0300 Subject: [PATCH] Fix logic to ignore notes with no body. Add tests to prevent regression - Notes with empty newlines in body were not being ignored - Add regression tests to avoid above regression in org_to_jsonl conversion --- src/processor/org_mode/org_to_jsonl.py | 5 ++- tests/test_org_to_jsonl.py | 62 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tests/test_org_to_jsonl.py diff --git a/src/processor/org_mode/org_to_jsonl.py b/src/processor/org_mode/org_to_jsonl.py index caad7715..ea2962f5 100644 --- a/src/processor/org_mode/org_to_jsonl.py +++ b/src/processor/org_mode/org_to_jsonl.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # Standard Packages +import re import json import argparse import pathlib @@ -71,14 +72,14 @@ def extract_org_entries(org_files): return entries -def convert_org_entries_to_jsonl(entries, verbose=0): +def convert_org_entries_to_jsonl(entries, verbose=0) -> str: "Convert each Org-Mode entries to JSON and collate as JSONL" jsonl = '' for entry in entries: entry_dict = dict() # Ignore title notes i.e notes with just headings and empty body - if not entry.Body() or entry.Body().strip(empty_escape_sequences) == "": + if not entry.Body() or re.sub(r'\n|\t|\r| ', '', entry.Body()) == "": continue entry_dict["compiled"] = f'{entry.Heading()}.' diff --git a/tests/test_org_to_jsonl.py b/tests/test_org_to_jsonl.py new file mode 100644 index 00000000..cadd4a6a --- /dev/null +++ b/tests/test_org_to_jsonl.py @@ -0,0 +1,62 @@ +# Standard Packages +import json +from posixpath import split + +# Internal Packages +from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, extract_org_entries +from src.utils.helpers import is_none_or_empty + + +def test_entry_with_empty_body_line_to_jsonl(tmp_path): + '''Ensure entries with empty body are ignored. + Property drawers not considered Body. Ignore control characters for evaluating if Body empty.''' + # Arrange + entry = f'''*** Heading + :PROPERTIES: + :ID: 42-42-42 + :END: + \t\r\n + ''' + orgfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Org files + entries = extract_org_entries(org_files=[orgfile]) + + # Process Each Entry from All Notes Files + jsonl_data = convert_org_entries_to_jsonl(entries) + + # Assert + assert is_none_or_empty(jsonl_data) + + +def test_entry_with_body_to_jsonl(tmp_path): + "Ensure entries with valid body text are loaded." + # Arrange + entry = f'''*** Heading + :PROPERTIES: + :ID: 42-42-42 + :END: + \t\r\nBody Line 1\n + ''' + orgfile = create_file(tmp_path, entry) + + # Act + # Extract Entries from specified Org files + entries = extract_org_entries(org_files=[orgfile]) + + # Process Each Entry from All Notes Files + jsonl_string = convert_org_entries_to_jsonl(entries) + jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()] + + # Assert + assert len(jsonl_data) == 1 + + +# Helper Functions +def create_file(tmp_path, entry, filename="test.org"): + org_file = tmp_path / f"notes/{filename}" + org_file.parent.mkdir() + org_file.touch() + org_file.write_text(entry) + return org_file \ No newline at end of file