Capture more files generated by code execution in sandbox

This change had been removed in 9a8c707 to avoid overwrites. We now use random filename for generated files to avoid overwrite from subsequent runs. Encourage model to write code that writes files in home folder to capture with logical filenames.
2026-04-19 17:14:35 +00:00 · 2025-12-29 00:20:36 -08:00
parent c5650f166a
commit 9b9cdc756f
2 changed files with 18 additions and 2 deletions
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -880,8 +880,8 @@ python_code_generation_prompt = PromptTemplate.from_template(
 You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
 - The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
 - You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
+- Do not try display images or plots in the code directly. The code should save the image or plot to a file in the home directory instead.
+- Write any document, charts etc. to be shared with the user to file. Files saved in the home directory can be seen by the user.
 - Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
 - Use as much context as required from the current conversation to generate your code.
 - The Python program you write should be self-contained. It does not have access to the current conversation.
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -5,6 +5,7 @@ import logging
 import mimetypes
 import os
 import re
+import uuid
 from pathlib import Path
 from typing import Any, Callable, List, NamedTuple, Optional

@@ -272,6 +273,21 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
                b64_data = content
            output_files.append({"filename": f.name, "b64_data": b64_data})

+        # Collect output files from execution results
+        # Repect ordering of output result types to disregard text output associated with images
+        downloaded_dataset = {f["b64_data"] for f in output_files}
+        output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
+        for result in execution.results:
+            if getattr(result, "chart", None):
+                continue
+            for result_type in output_result_types:
+                b64_data = getattr(result, result_type, None)
+                # Generate random filename if not already downloaded
+                if b64_data and b64_data not in downloaded_dataset:
+                    filename = f"/tmp/{uuid.uuid4()}.{result_type}"
+                    output_files.append({"filename": filename, "b64_data": b64_data})
+                    break
+
        # collect logs
        success = not execution.error and not execution.logs.stderr
        stdout = "\n".join(execution.logs.stdout)