mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Fix striping empty escape sequences from strings
- Fix log message on jsonl write
This commit is contained in:
@@ -88,7 +88,7 @@ def extract_beancount_transactions(beancount_files):
|
|||||||
|
|
||||||
# Initialize Regex for extracting Beancount Entries
|
# Initialize Regex for extracting Beancount Entries
|
||||||
transaction_regex = r'^\n?\d{4}-\d{2}-\d{2} [\*|\!] '
|
transaction_regex = r'^\n?\d{4}-\d{2}-\d{2} [\*|\!] '
|
||||||
empty_newline = f'^[{empty_escape_sequences}]*$'
|
empty_newline = f'^[\n\r\t\ ]*$'
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
transaction_to_file_map = []
|
transaction_to_file_map = []
|
||||||
|
|||||||
@@ -97,7 +97,8 @@ def extract_markdown_entries(markdown_files):
|
|||||||
markdown_content = f.read()
|
markdown_content = f.read()
|
||||||
markdown_entries_per_file = [f'#{entry.strip(empty_escape_sequences)}'
|
markdown_entries_per_file = [f'#{entry.strip(empty_escape_sequences)}'
|
||||||
for entry
|
for entry
|
||||||
in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)]
|
in re.split(markdown_heading_regex, markdown_content, flags=re.MULTILINE)
|
||||||
|
if entry.strip(empty_escape_sequences) != '']
|
||||||
entry_to_file_map += zip(markdown_entries_per_file, [markdown_file]*len(markdown_entries_per_file))
|
entry_to_file_map += zip(markdown_entries_per_file, [markdown_file]*len(markdown_entries_per_file))
|
||||||
entries.extend(markdown_entries_per_file)
|
entries.extend(markdown_entries_per_file)
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
app_root_directory = Path(__file__).parent.parent.parent
|
app_root_directory = Path(__file__).parent.parent.parent
|
||||||
web_directory = app_root_directory / 'src/interface/web/'
|
web_directory = app_root_directory / 'src/interface/web/'
|
||||||
empty_escape_sequences = r'\n|\r\t '
|
empty_escape_sequences = '\n|\r|\t| '
|
||||||
|
|
||||||
# default app config to use
|
# default app config to use
|
||||||
default_config = {
|
default_config = {
|
||||||
|
|||||||
@@ -54,4 +54,4 @@ def compress_jsonl_data(jsonl_data, output_path):
|
|||||||
with gzip.open(output_path, 'wt') as gzip_file:
|
with gzip.open(output_path, 'wt') as gzip_file:
|
||||||
gzip_file.write(jsonl_data)
|
gzip_file.write(jsonl_data)
|
||||||
|
|
||||||
logger.info(f'Wrote {len(jsonl_data)} lines to gzip compressed jsonl at {output_path}')
|
logger.info(f'Wrote jsonl data to gzip compressed jsonl at {output_path}')
|
||||||
Reference in New Issue
Block a user