Make LLM actors write & code sandbox check for artifacts in /home/user

Fix - Ensure researcher and coder know to save files to /home/user dir - Make E2B code executor check for generated files in /home/user - Do not re-add file types already downloaded from /home/user Issues - E2B has a mismatch in default home_dir for run_code & list_dir cmds So run_code was run with /root as home dir. And list_dir("~") was checking under /home/user. This caused files written to /home/user by code not to be discovered by the list_files step. - Previously the researcher did not know that generated files should be written to /home/user. So it could tell the coder to save files to a different directory. Now the researcher knows where to save files to show them to user as well.
2026-03-02 05:29:12 +00:00 · 2025-12-29 02:09:32 -08:00
parent b607a6187e
commit a58ae3dd84
3 changed files with 14 additions and 11 deletions
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -880,8 +880,8 @@ python_code_generation_prompt = PromptTemplate.from_template(
 You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
 - The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
 - You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
- Do not try display images or plots in the code directly. The code should save the image or plot to a file in the home directory instead.
- Write any document, charts etc. to be shared with the user to file. Files saved in the home directory can be seen by the user.
+- Do not try display images or plots in the code directly. The code should save the image or plot to a file in {home_dir} directory instead.
+- Write any document, charts etc. to be shared with the user to files in {home_dir} directory.
 - Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
 - Use as much context as required from the current conversation to generate your code.
 - The Python program you write should be self-contained. It does not have access to the current conversation.
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -42,6 +42,7 @@ logger = logging.getLogger(__name__)

 SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL")
 DEFAULT_E2B_TEMPLATE = "pmt2o0ghpang8gbiys57"
+HOME_DIR = "/home/user"


 class GeneratedCode(NamedTuple):
@@ -147,6 +148,7 @@ async def generate_python_code(
        chat_history=chat_history_str,
        context=context,
        has_network_access=network_access_context,
+        home_dir=HOME_DIR,
        current_date=utc_date,
        location=location,
        username=username,
@@ -243,7 +245,7 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:

        # Note stored files before execution to identify new files created during execution
        E2bFile = NamedTuple("E2bFile", [("name", str), ("path", str)])
-        original_files = {E2bFile(f.name, f.path) for f in await sandbox.files.list("~")}
+        original_files = {E2bFile(f.name, f.path) for f in await sandbox.files.list(HOME_DIR, depth=1)}

        # Execute code from main.py file
        execution = await sandbox.run_code(code=code, timeout=60)
@@ -253,7 +255,7 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
        image_file_ext = {".png", ".jpeg", ".jpg", ".svg"}

        # Identify new files created during execution
-        new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
+        new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list(HOME_DIR, depth=1)) - original_files

        # Read newly created files in parallel
        def read_format(f):
@@ -274,17 +276,17 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
            output_files.append({"filename": f.name, "b64_data": b64_data})

        # Collect output files from execution results
-        # Repect ordering of output result types to disregard text output associated with images
-        downloaded_dataset = {f["b64_data"] for f in output_files}
+        # Respect ordering of output result types to disregard text output associated with images
+        downloaded_datatypes = {f["filename"].split(".")[-1] for f in output_files}
        output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
        for result in execution.results:
            if getattr(result, "chart", None):
                continue
            for result_type in output_result_types:
-                b64_data = getattr(result, result_type, None)
-                # Generate random filename if not already downloaded
-                if b64_data and b64_data not in downloaded_dataset:
-                    filename = f"/tmp/{uuid.uuid4()}.{result_type}"
+                if b64_data := getattr(result, result_type, None):
+                    if result_type in downloaded_datatypes:
+                        break
+                    filename = f"{HOME_DIR}/{uuid.uuid4()}.{result_type}"
                    output_files.append({"filename": filename, "b64_data": b64_data})
                    break

--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -459,8 +459,9 @@ command_descriptions_for_agent = {

 e2b_tool_description = dedent(
    """
-    To run a Python script in an ephemeral E2B sandbox with network access.
+    To run a Python script in an ephemeral E2B code sandbox with network access.
    Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
+    Save files in /home/user to show them to the user. Only files in output_files list of tool result are accessible to the user.
    Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available.

    Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.