From 9b9cdc756f5d8bc9b312e03ba948a513a7072915 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Mon, 29 Dec 2025 00:20:36 -0800
Subject: [PATCH] Capture more files generated by code execution in sandbox

This change had been removed in 9a8c707 to avoid overwrites. We now
use random filename for generated files to avoid overwrite from
subsequent runs.

Encourage model to write code that writes files in home folder to
capture with logical filenames.
---
 src/khoj/processor/conversation/prompts.py |  4 ++--
 src/khoj/processor/tools/run_code.py       | 16 ++++++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py
index e1160b60..3e570044 100644
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -880,8 +880,8 @@ python_code_generation_prompt = PromptTemplate.from_template(
 You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
 - The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
 - You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
-- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
-- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
+- Do not try display images or plots in the code directly. The code should save the image or plot to a file in the home directory instead.
+- Write any document, charts etc. to be shared with the user to file. Files saved in the home directory can be seen by the user.
 - Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
 - Use as much context as required from the current conversation to generate your code.
 - The Python program you write should be self-contained. It does not have access to the current conversation.
diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py
index 15e77d9c..9d250eff 100644
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -5,6 +5,7 @@ import logging
 import mimetypes
 import os
 import re
+import uuid
 from pathlib import Path
 from typing import Any, Callable, List, NamedTuple, Optional
 
@@ -272,6 +273,21 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
                 b64_data = content
             output_files.append({"filename": f.name, "b64_data": b64_data})
 
+        # Collect output files from execution results
+        # Repect ordering of output result types to disregard text output associated with images
+        downloaded_dataset = {f["b64_data"] for f in output_files}
+        output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
+        for result in execution.results:
+            if getattr(result, "chart", None):
+                continue
+            for result_type in output_result_types:
+                b64_data = getattr(result, result_type, None)
+                # Generate random filename if not already downloaded
+                if b64_data and b64_data not in downloaded_dataset:
+                    filename = f"/tmp/{uuid.uuid4()}.{result_type}"
+                    output_files.append({"filename": filename, "b64_data": b64_data})
+                    break
+
         # collect logs
         success = not execution.error and not execution.logs.stderr
         stdout = "\n".join(execution.logs.stdout)