Handle parsing json from string with plain text suffix

This commit is contained in:
Debanjum
2025-05-22 20:54:28 -07:00
parent acebb90643
commit dca17591f3
2 changed files with 54 additions and 12 deletions

View File

@@ -697,8 +697,9 @@ def clean_code_python(code: str):
def load_complex_json(json_str):
"""
Preprocess a raw JSON string to escape unescaped double quotes within value strings,
while preserving the JSON structure and already escaped quotes.
Preprocess a raw JSON string to
- escape unescaped double quotes within value strings while preserving the JSON structure and already escaped quotes.
- remove suffix after the first valid JSON object,
"""
def replace_unescaped_quotes(match):
@@ -726,9 +727,20 @@ def load_complex_json(json_str):
for loads in json_loaders_to_try:
try:
return loads(processed)
except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
errors.append(f"{type(e).__name__}: {str(e)}")
except (json.JSONDecodeError, pyjson5.Json5Exception) as e_load:
loader_name = loads.__name__
errors.append(f"{loader_name} (initial parse): {type(e_load).__name__}: {str(e_load)}")
# Handle plain text suffixes by slicing at error position
if hasattr(e_load, "pos") and 0 < e_load.pos < len(processed):
try:
sliced = processed[: e_load.pos].strip()
if sliced:
return loads(sliced)
except Exception as e_slice:
errors.append(
f"{loader_name} after slice at {e_load.pos}: {type(e_slice).__name__}: {str(e_slice)}"
)
# If all loaders fail, raise the aggregated error
raise ValueError(
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"