Reuse logic to extract entries across symmetric, asymmetric search

Now that the logic to compile entries is in the processor layer, the
extract_entries method is standard across (text) search_types

Extract the load_jsonl method as a utility helper method.
Use it in (a)symmetric search types
This commit is contained in:
Debanjum Singh Solanky
2022-07-21 02:53:18 +04:00
parent e220ecc00b
commit 5aad297286
4 changed files with 39 additions and 56 deletions

View File

@@ -1,7 +1,12 @@
# Standard Packages
import json
import gzip
import pathlib
from os.path import join
# Internal Packages
from src.utils.constants import empty_escape_sequences
def is_none_or_empty(item):
return item == None or (hasattr(item, '__iter__') and len(item) == 0)
@@ -52,4 +57,30 @@ def load_model(model_name, model_dir, model_type):
if model_path is not None:
model.save(model_path)
return model
return model
def load_jsonl(input_path, verbose=0):
"Read List of JSON objects from JSON line file"
# Initialize Variables
data = []
jsonl_file = None
# Open JSONL file
if input_path.suffix == ".gz":
jsonl_file = gzip.open(get_absolute_path(input_path), 'rt', encoding='utf-8')
elif input_path.suffix == ".jsonl":
jsonl_file = open(get_absolute_path(input_path), 'r', encoding='utf-8')
# Read JSONL file
for line in jsonl_file:
data.append(json.loads(line.strip(empty_escape_sequences)))
# Close JSONL file
jsonl_file.close()
# Log JSONL entries loaded
if verbose > 0:
print(f'Loaded {len(data)} records from {input_path}')
return data