mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Truncate long words in stdout, stderr for context efficiency
Avoid long base64 images etc. in stdout, stderr to result in context limits being hit.
This commit is contained in:
@@ -9,6 +9,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import random
|
import random
|
||||||
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
import uuid
|
import uuid
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
@@ -894,6 +895,13 @@ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000
|
|||||||
"filename": output_file["filename"],
|
"filename": output_file["filename"],
|
||||||
"b64_data": output_file["b64_data"][:max_chars] + "...",
|
"b64_data": output_file["b64_data"][:max_chars] + "...",
|
||||||
}
|
}
|
||||||
|
# Truncate long "words" in stdout, stderr. Words are alphanumeric strings not separated by whitespace.
|
||||||
|
for key in ["std_out", "std_err"]:
|
||||||
|
if key in code_result["results"]:
|
||||||
|
code_result["results"][key] = re.sub(
|
||||||
|
r"\S{1000,}", lambda m: m.group(0)[:1000] + "...", code_result["results"][key]
|
||||||
|
)
|
||||||
|
|
||||||
return code_results
|
return code_results
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user