mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Handle parsing json from string with plain text suffix
This commit is contained in:
@@ -697,8 +697,9 @@ def clean_code_python(code: str):
|
||||
|
||||
def load_complex_json(json_str):
|
||||
"""
|
||||
Preprocess a raw JSON string to escape unescaped double quotes within value strings,
|
||||
while preserving the JSON structure and already escaped quotes.
|
||||
Preprocess a raw JSON string to
|
||||
- escape unescaped double quotes within value strings while preserving the JSON structure and already escaped quotes.
|
||||
- remove suffix after the first valid JSON object,
|
||||
"""
|
||||
|
||||
def replace_unescaped_quotes(match):
|
||||
@@ -726,9 +727,20 @@ def load_complex_json(json_str):
|
||||
for loads in json_loaders_to_try:
|
||||
try:
|
||||
return loads(processed)
|
||||
except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
|
||||
errors.append(f"{type(e).__name__}: {str(e)}")
|
||||
except (json.JSONDecodeError, pyjson5.Json5Exception) as e_load:
|
||||
loader_name = loads.__name__
|
||||
errors.append(f"{loader_name} (initial parse): {type(e_load).__name__}: {str(e_load)}")
|
||||
|
||||
# Handle plain text suffixes by slicing at error position
|
||||
if hasattr(e_load, "pos") and 0 < e_load.pos < len(processed):
|
||||
try:
|
||||
sliced = processed[: e_load.pos].strip()
|
||||
if sliced:
|
||||
return loads(sliced)
|
||||
except Exception as e_slice:
|
||||
errors.append(
|
||||
f"{loader_name} after slice at {e_load.pos}: {type(e_slice).__name__}: {str(e_slice)}"
|
||||
)
|
||||
# If all loaders fail, raise the aggregated error
|
||||
raise ValueError(
|
||||
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
|
||||
|
||||
@@ -175,16 +175,46 @@ class TestTruncateMessage:
|
||||
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"
|
||||
|
||||
|
||||
def test_load_complex_raw_json_string():
|
||||
# Arrange
|
||||
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
|
||||
expeced_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
|
||||
class TestLoadComplexJson:
|
||||
def test_load_complex_raw_json_string(self):
|
||||
# Arrange
|
||||
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
|
||||
expected_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
|
||||
|
||||
# Act
|
||||
parsed_json = utils.load_complex_json(raw_json)
|
||||
# Act
|
||||
parsed_json = utils.load_complex_json(raw_json)
|
||||
|
||||
# Assert
|
||||
assert parsed_json == expeced_json
|
||||
# Assert
|
||||
assert parsed_json == expected_json
|
||||
|
||||
def test_load_complex_json_with_python_code(self):
|
||||
# Arrange
|
||||
raw_json = r"""{"python": "import os\nvalue = \"\"\"\nfirst line of "text"\nsecond line of 'text'\n\"\"\"\nprint(value)"}"""
|
||||
expected_json = {
|
||||
"python": 'import os\nvalue = """\nfirst line of "text"\nsecond line of \'text\'\n"""\nprint(value)'
|
||||
}
|
||||
|
||||
# Act
|
||||
parsed_json = utils.load_complex_json(raw_json)
|
||||
|
||||
# Assert
|
||||
assert parsed_json == expected_json
|
||||
|
||||
def test_load_complex_json_inline(self):
|
||||
# Arrange
|
||||
raw_json = """
|
||||
{"key1": "value1", "key2": "value2"}plain text suffix
|
||||
"""
|
||||
expected_json = {
|
||||
"key1": "value1",
|
||||
"key2": "value2",
|
||||
}
|
||||
|
||||
# Act
|
||||
parsed_json = utils.load_complex_json(raw_json)
|
||||
|
||||
# Assert
|
||||
assert parsed_json == expected_json
|
||||
|
||||
|
||||
def generate_content(count, suffix=""):
|
||||
|
||||
Reference in New Issue
Block a user