Truncate long words in stdout, stderr for context efficiency

Avoid long base64 images etc. in stdout, stderr to result in context limits being hit.
2026-03-08 05:39:13 +00:00 · 2025-08-19 23:31:45 -07:00
parent dbc3330610
commit fb0347a388
1 changed files with 8 additions and 0 deletions
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -9,6 +9,7 @@ import logging
 import os
 import platform
 import random
 import re
 import urllib.parse
 import uuid
 from collections import OrderedDict
@@ -894,6 +895,13 @@ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000
                    "filename": output_file["filename"],
                    "b64_data": output_file["b64_data"][:max_chars] + "...",
                }
        # Truncate long "words" in stdout, stderr. Words are alphanumeric strings not separated by whitespace.
        for key in ["std_out", "std_err"]:
            if key in code_result["results"]:
                code_result["results"][key] = re.sub(
                    r"\S{1000,}", lambda m: m.group(0)[:1000] + "...", code_result["results"][key]
                )
    return code_results