diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 21a95a29..efd3c51d 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -5,6 +5,7 @@ import math import mimetypes import os import queue +import re import uuid from dataclasses import dataclass from datetime import datetime @@ -538,6 +539,46 @@ def clean_code_python(code: str): return code.strip().removeprefix("```python").removesuffix("```") +def load_complex_json(json_str): + """ + Preprocess a raw JSON string to escape unescaped double quotes within value strings, + while preserving the JSON structure and already escaped quotes. + """ + + def replace_unescaped_quotes(match): + # Get the content between colons and commas/end braces + content = match.group(1) + # Replace unescaped double, single quotes that aren't already escaped + # Uses negative lookbehind to avoid replacing already escaped quotes + # Replace " with \" + processed_dq = re.sub(r'(?