Handle parsing json from string with plain text suffix

This commit is contained in:
Debanjum
2025-05-22 20:54:28 -07:00
parent acebb90643
commit dca17591f3
2 changed files with 54 additions and 12 deletions

View File

@@ -697,8 +697,9 @@ def clean_code_python(code: str):
def load_complex_json(json_str):
"""
Preprocess a raw JSON string to escape unescaped double quotes within value strings,
while preserving the JSON structure and already escaped quotes.
Preprocess a raw JSON string to
- escape unescaped double quotes within value strings while preserving the JSON structure and already escaped quotes.
- remove suffix after the first valid JSON object,
"""
def replace_unescaped_quotes(match):
@@ -726,9 +727,20 @@ def load_complex_json(json_str):
for loads in json_loaders_to_try:
try:
return loads(processed)
except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
errors.append(f"{type(e).__name__}: {str(e)}")
except (json.JSONDecodeError, pyjson5.Json5Exception) as e_load:
loader_name = loads.__name__
errors.append(f"{loader_name} (initial parse): {type(e_load).__name__}: {str(e_load)}")
# Handle plain text suffixes by slicing at error position
if hasattr(e_load, "pos") and 0 < e_load.pos < len(processed):
try:
sliced = processed[: e_load.pos].strip()
if sliced:
return loads(sliced)
except Exception as e_slice:
errors.append(
f"{loader_name} after slice at {e_load.pos}: {type(e_slice).__name__}: {str(e_slice)}"
)
# If all loaders fail, raise the aggregated error
raise ValueError(
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"

View File

@@ -175,16 +175,46 @@ class TestTruncateMessage:
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"
def test_load_complex_raw_json_string():
# Arrange
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
expeced_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
class TestLoadComplexJson:
def test_load_complex_raw_json_string(self):
# Arrange
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
expected_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
# Act
parsed_json = utils.load_complex_json(raw_json)
# Act
parsed_json = utils.load_complex_json(raw_json)
# Assert
assert parsed_json == expeced_json
# Assert
assert parsed_json == expected_json
def test_load_complex_json_with_python_code(self):
# Arrange
raw_json = r"""{"python": "import os\nvalue = \"\"\"\nfirst line of "text"\nsecond line of 'text'\n\"\"\"\nprint(value)"}"""
expected_json = {
"python": 'import os\nvalue = """\nfirst line of "text"\nsecond line of \'text\'\n"""\nprint(value)'
}
# Act
parsed_json = utils.load_complex_json(raw_json)
# Assert
assert parsed_json == expected_json
def test_load_complex_json_inline(self):
# Arrange
raw_json = """
{"key1": "value1", "key2": "value2"}plain text suffix
"""
expected_json = {
"key1": "value1",
"key2": "value2",
}
# Act
parsed_json = utils.load_complex_json(raw_json)
# Assert
assert parsed_json == expected_json
def generate_content(count, suffix=""):