mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 13:25:11 +00:00
Handle parsing json from string with plain text suffix
This commit is contained in:
@@ -697,8 +697,9 @@ def clean_code_python(code: str):
|
|||||||
|
|
||||||
def load_complex_json(json_str):
|
def load_complex_json(json_str):
|
||||||
"""
|
"""
|
||||||
Preprocess a raw JSON string to escape unescaped double quotes within value strings,
|
Preprocess a raw JSON string to
|
||||||
while preserving the JSON structure and already escaped quotes.
|
- escape unescaped double quotes within value strings while preserving the JSON structure and already escaped quotes.
|
||||||
|
- remove suffix after the first valid JSON object,
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def replace_unescaped_quotes(match):
|
def replace_unescaped_quotes(match):
|
||||||
@@ -726,9 +727,20 @@ def load_complex_json(json_str):
|
|||||||
for loads in json_loaders_to_try:
|
for loads in json_loaders_to_try:
|
||||||
try:
|
try:
|
||||||
return loads(processed)
|
return loads(processed)
|
||||||
except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
|
except (json.JSONDecodeError, pyjson5.Json5Exception) as e_load:
|
||||||
errors.append(f"{type(e).__name__}: {str(e)}")
|
loader_name = loads.__name__
|
||||||
|
errors.append(f"{loader_name} (initial parse): {type(e_load).__name__}: {str(e_load)}")
|
||||||
|
|
||||||
|
# Handle plain text suffixes by slicing at error position
|
||||||
|
if hasattr(e_load, "pos") and 0 < e_load.pos < len(processed):
|
||||||
|
try:
|
||||||
|
sliced = processed[: e_load.pos].strip()
|
||||||
|
if sliced:
|
||||||
|
return loads(sliced)
|
||||||
|
except Exception as e_slice:
|
||||||
|
errors.append(
|
||||||
|
f"{loader_name} after slice at {e_load.pos}: {type(e_slice).__name__}: {str(e_slice)}"
|
||||||
|
)
|
||||||
# If all loaders fail, raise the aggregated error
|
# If all loaders fail, raise the aggregated error
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
|
f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
|
||||||
|
|||||||
@@ -175,16 +175,46 @@ class TestTruncateMessage:
|
|||||||
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"
|
assert truncated_chat_history[0] != copy_big_chat_message, "Original message should be modified"
|
||||||
|
|
||||||
|
|
||||||
def test_load_complex_raw_json_string():
|
class TestLoadComplexJson:
|
||||||
# Arrange
|
def test_load_complex_raw_json_string(self):
|
||||||
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
|
# Arrange
|
||||||
expeced_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
|
raw_json = r"""{"key": "value with unescaped " and unescaped \' and escaped \" and escaped \\'"}"""
|
||||||
|
expected_json = {"key": "value with unescaped \" and unescaped \\' and escaped \" and escaped \\'"}
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
parsed_json = utils.load_complex_json(raw_json)
|
parsed_json = utils.load_complex_json(raw_json)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert parsed_json == expeced_json
|
assert parsed_json == expected_json
|
||||||
|
|
||||||
|
def test_load_complex_json_with_python_code(self):
|
||||||
|
# Arrange
|
||||||
|
raw_json = r"""{"python": "import os\nvalue = \"\"\"\nfirst line of "text"\nsecond line of 'text'\n\"\"\"\nprint(value)"}"""
|
||||||
|
expected_json = {
|
||||||
|
"python": 'import os\nvalue = """\nfirst line of "text"\nsecond line of \'text\'\n"""\nprint(value)'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Act
|
||||||
|
parsed_json = utils.load_complex_json(raw_json)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert parsed_json == expected_json
|
||||||
|
|
||||||
|
def test_load_complex_json_inline(self):
|
||||||
|
# Arrange
|
||||||
|
raw_json = """
|
||||||
|
{"key1": "value1", "key2": "value2"}plain text suffix
|
||||||
|
"""
|
||||||
|
expected_json = {
|
||||||
|
"key1": "value1",
|
||||||
|
"key2": "value2",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Act
|
||||||
|
parsed_json = utils.load_complex_json(raw_json)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert parsed_json == expected_json
|
||||||
|
|
||||||
|
|
||||||
def generate_content(count, suffix=""):
|
def generate_content(count, suffix=""):
|
||||||
|
|||||||
Reference in New Issue
Block a user