mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 05:39:12 +00:00
Reuse logic to extract entries across symmetric, asymmetric search
Now that the logic to compile entries is in the processor layer, the extract_entries method is standard across (text) search_types Extract the load_jsonl method as a utility helper method. Use it in (a)symmetric search types
This commit is contained in:
@@ -9,7 +9,6 @@ import gzip
|
||||
import re
|
||||
|
||||
# Internal Packages
|
||||
from src.processor.org_mode import orgnode
|
||||
from src.utils.helpers import get_absolute_path, is_none_or_empty
|
||||
from src.utils.constants import empty_escape_sequences
|
||||
|
||||
@@ -58,32 +57,6 @@ def compress_jsonl_data(jsonl_data, output_path, verbose=0):
|
||||
print(f'Wrote {jsonl_entries} lines to gzip compressed jsonl at {output_path}')
|
||||
|
||||
|
||||
def load_jsonl(input_path, verbose=0):
|
||||
"Read List of JSON objects from JSON line file"
|
||||
# Initialize Variables
|
||||
data = []
|
||||
jsonl_file = None
|
||||
|
||||
# Open JSONL file
|
||||
if input_path.suffix == ".gz":
|
||||
jsonl_file = gzip.open(get_absolute_path(input_path), 'rt', encoding='utf-8')
|
||||
elif input_path.suffix == ".jsonl":
|
||||
jsonl_file = open(get_absolute_path(input_path), 'r', encoding='utf-8')
|
||||
|
||||
# Read JSONL file
|
||||
for line in jsonl_file:
|
||||
data.append(json.loads(line.strip(empty_escape_sequences)))
|
||||
|
||||
# Close JSONL file
|
||||
jsonl_file.close()
|
||||
|
||||
# Log JSONL entries loaded
|
||||
if verbose > 0:
|
||||
print(f'Loaded {len(data)} records from {input_path}')
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def get_beancount_files(beancount_files=None, beancount_file_filter=None, verbose=0):
|
||||
"Get Beancount files to process"
|
||||
absolute_beancount_files, filtered_beancount_files = set(), set()
|
||||
|
||||
Reference in New Issue
Block a user