Improve Extraction of Beancount Entries

- Only extract entries starting with YYYY-MM-DD from Beancount
- Strip Trailing Escape Sequences from Entries
This commit is contained in:
Debanjum Singh Solanky
2022-02-26 17:36:30 -05:00
parent b3ac2dd730
commit b68558651b

View File

@@ -6,6 +6,7 @@ import argparse
import pathlib import pathlib
import glob import glob
import gzip import gzip
import re
# Internal Packages # Internal Packages
from src.processor.org_mode import orgnode from src.processor.org_mode import orgnode
@@ -110,11 +111,19 @@ def get_beancount_files(beancount_files=None, beancount_file_filter=None, verbos
def extract_beancount_entries(beancount_files): def extract_beancount_entries(beancount_files):
"Extract entries from specified Beancount files" "Extract entries from specified Beancount files"
# Initialize Regex for extracting Beancount Entries
date_regex = r'^\n?\d{4}-\d{2}-\d{2}'
empty_newline = r'^[\n\r\t ]*$'
entries = [] entries = []
for beancount_file in beancount_files: for beancount_file in beancount_files:
with open(beancount_file) as f: with open(beancount_file) as f:
entries.extend( ledger_content = f.read()
f.read().split('\n\n')) entries.extend([entry.strip('\n|\r|\t| ')
for entry
in re.split(empty_newline, ledger_content, flags=re.MULTILINE)
if re.match(date_regex, entry)])
return entries return entries