mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 13:23:15 +00:00
Handle parsing json from string with plain text suffix
This commit is contained in:
@@ -697,8 +697,9 @@ def clean_code_python(code: str):
|
||||
|
||||
def load_complex_json(json_str):
|
||||
"""
|
||||
Preprocess a raw JSON string to escape unescaped double quotes within value strings,
|
||||
while preserving the JSON structure and already escaped quotes.
|
||||
Preprocess a raw JSON string to
|
||||
- escape unescaped double quotes within value strings while preserving the JSON structure and already escaped quotes.
|
||||
- remove suffix after the first valid JSON object,
|
||||
"""
|
||||
|
||||
def replace_unescaped_quotes(match):
|
||||
@@ -726,9 +727,20 @@ def load_complex_json(json_str):
|
||||
for loads in json_loaders_to_try:
|
||||
try:
|
||||
return loads(processed)
|
||||
except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
|
||||
errors.append(f"{type(e).__name__}: {str(e)}")
|
||||
except (json.JSONDecodeError, pyjson5.Json5Exception) as e_load:
|
||||
loader_name = loads.__name__
|
||||
errors.append(f"{loader_name} (initial parse): {type(e_load).__name__}: {str(e_load)}")
|
||||
|
||||
# Handle plain text suffixes by slicing at error position
|
||||
if hasattr(e_load, "pos") and 0 < e_load.pos < len(processed):
|
||||
try:
|
||||
sliced = processed[: e_load.pos].strip()
|
||||
if sliced:
|
||||
return loads(sliced)
|
||||
except Exception as e_slice:
|
||||
errors.append(
|
||||
f"{loader_name} after slice at {e_load.pos}: {type(e_slice).__name__}: {str(e_slice)}"
|
||||
)
|
||||
# If all loaders fail, raise the aggregated error
|
||||
raise ValueError(
|
||||
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
|
||||
|
||||
Reference in New Issue
Block a user