mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Improve Extraction of Beancount Entries
- Only extract entries starting with YYYY-MM-DD from Beancount - Strip Trailing Escape Sequences from Entries
This commit is contained in:
@@ -6,6 +6,7 @@ import argparse
|
|||||||
import pathlib
|
import pathlib
|
||||||
import glob
|
import glob
|
||||||
import gzip
|
import gzip
|
||||||
|
import re
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.processor.org_mode import orgnode
|
from src.processor.org_mode import orgnode
|
||||||
@@ -110,11 +111,19 @@ def get_beancount_files(beancount_files=None, beancount_file_filter=None, verbos
|
|||||||
|
|
||||||
def extract_beancount_entries(beancount_files):
|
def extract_beancount_entries(beancount_files):
|
||||||
"Extract entries from specified Beancount files"
|
"Extract entries from specified Beancount files"
|
||||||
|
|
||||||
|
# Initialize Regex for extracting Beancount Entries
|
||||||
|
date_regex = r'^\n?\d{4}-\d{2}-\d{2}'
|
||||||
|
empty_newline = r'^[\n\r\t ]*$'
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
for beancount_file in beancount_files:
|
for beancount_file in beancount_files:
|
||||||
with open(beancount_file) as f:
|
with open(beancount_file) as f:
|
||||||
entries.extend(
|
ledger_content = f.read()
|
||||||
f.read().split('\n\n'))
|
entries.extend([entry.strip('\n|\r|\t| ')
|
||||||
|
for entry
|
||||||
|
in re.split(empty_newline, ledger_content, flags=re.MULTILINE)
|
||||||
|
if re.match(date_regex, entry)])
|
||||||
|
|
||||||
return entries
|
return entries
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user