Add E2B as an optional code sandbox provider

- Specify E2B api key and template to use via env variables - Try load, use e2b library when E2B api key set - Fallback to try use terrarium sandbox otherwise - Enable more python packages in e2b sandbox like rdkit via custom e2b template - Use Async E2B Sandbox - Parallelize file IO with sandbox - Add documentation on how to enable E2B as code sandbox instead of Terrarium
2026-03-06 05:39:12 +00:00 · 2025-02-15 03:11:15 +05:30
parent b4183c7333
commit 45fb85f1df
7 changed files with 157 additions and 18 deletions
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -974,9 +974,8 @@ Khoj:
 python_code_generation_prompt = PromptTemplate.from_template(
    """
 You are Khoj, an advanced python programmer. You are tasked with constructing a python program to best answer the user query.
- The python program will run in a pyodide python sandbox with no network access.
+- The python program will run in a sandbox with no network access.
 - You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query.
- The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4 and sympy packages. The requests, torch, catboost, tensorflow and tkinter packages are not available.
 - List known file paths to required user documents in "input_files" and known links to required documents from the web in the "input_links" field.
 - The python program should be self-contained. It can only read data generated by the program itself and from provided input_files, input_links by their basename (i.e filename excluding file path).
 - Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
@@ -1030,6 +1029,13 @@ Code Execution Results:
 """.strip()
 )

+e2b_sandbox_context = """
+- The sandbox has access to only the standard library, matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit packages. The requests, torch, catboost, tensorflow and tkinter packages are not available.
+""".strip()
+
+terrarium_sandbox_context = """
+The sandbox has access to the standard library, matplotlib, pandas, numpy, scipy, bs4 and sympy packages. The requests, torch, catboost, tensorflow, rdkit and tkinter packages are not available.
+""".strip()

 # Automations
 # --
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -27,7 +27,12 @@ from khoj.processor.conversation.utils import (
    load_complex_json,
 )
 from khoj.routers.helpers import send_message_to_model_wrapper
-from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
+from khoj.utils.helpers import (
+    is_e2b_code_sandbox_enabled,
+    is_none_or_empty,
+    timer,
+    truncate_code_context,
+)
 from khoj.utils.rawconfig import LocationData

 logger = logging.getLogger(__name__)
@@ -131,6 +136,12 @@ async def generate_python_code(
        prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
    )

+    # add sandbox specific context like available packages
+    sandbox_context = (
+        prompts.e2b_sandbox_context if is_e2b_code_sandbox_enabled() else prompts.terrarium_sandbox_context
+    )
+    personality_context = f"{sandbox_context}\n{personality_context}"
+
    code_generation_prompt = prompts.python_code_generation_prompt.format(
        current_date=utc_date,
        query=q,
@@ -182,15 +193,104 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
    Reference data i/o format based on Terrarium example client code at:
    https://github.com/cohere-ai/cohere-terrarium/blob/main/example-clients/python/terrarium_client.py
    """
-    headers = {"Content-Type": "application/json"}
    cleaned_code = clean_code_python(code)
-    data = {"code": cleaned_code, "files": input_data}
+    if is_e2b_code_sandbox_enabled():
+        try:
+            return await execute_e2b(cleaned_code, input_data)
+        except ImportError:
+            pass
+    return await execute_terrarium(cleaned_code, input_data, sandbox_url)

+
+async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
+    """Execute code and handle file I/O in e2b sandbox"""
+    from e2b_code_interpreter import AsyncSandbox
+
+    sandbox = await AsyncSandbox.create(
+        api_key=os.getenv("E2B_API_KEY"),
+        template=os.getenv("E2B_TEMPLATE", "pmt2o0ghpang8gbiys57"),
+        timeout=120,
+        request_timeout=30,
+    )
+
+    try:
+        # Upload input files in parallel
+        upload_tasks = [
+            sandbox.files.write(path=file["filename"], data=base64.b64decode(file["b64_data"]), request_timeout=30)
+            for file in input_files
+        ]
+        await asyncio.gather(*upload_tasks)
+
+        # Note stored files before execution
+        E2bFile = NamedTuple("E2bFile", [("name", str), ("path", str)])
+        original_files = {E2bFile(f.name, f.path) for f in await sandbox.files.list("~")}
+
+        # Execute code from main.py file
+        execution = await sandbox.run_code(code=code, timeout=60)
+
+        # Collect output files
+        output_files = []
+
+        # Identify new files created during execution
+        new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
+        # Read newly created files in parallel
+        download_tasks = [sandbox.files.read(f.path, request_timeout=30) for f in new_files]
+        downloaded_files = await asyncio.gather(*download_tasks)
+        for f, content in zip(new_files, downloaded_files):
+            if isinstance(content, bytes):
+                # Binary files like PNG - encode as base64
+                b64_data = base64.b64encode(content).decode("utf-8")
+            elif Path(f.name).suffix in [".png", ".jpeg", ".jpg", ".svg"]:
+                # Ignore image files as they are extracted from execution results below for inline display
+                continue
+            else:
+                # Text files - encode utf-8 string as base64
+                b64_data = base64.b64encode(content.encode("utf-8")).decode("utf-8")
+            output_files.append({"filename": f.name, "b64_data": b64_data})
+
+        # Collect output files from execution results
+        for idx, result in enumerate(execution.results):
+            for result_type in ["png", "jpeg", "svg", "text", "markdown", "json"]:
+                if b64_data := getattr(result, result_type, None):
+                    output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
+                    break
+
+        # collect logs
+        success = not execution.error and not execution.logs.stderr
+        stdout = "\n".join(execution.logs.stdout)
+        errors = "\n".join(execution.logs.stderr)
+        if execution.error:
+            errors = f"{execution.error}\n{errors}"
+
+        return {
+            "code": code,
+            "success": success,
+            "std_out": stdout,
+            "std_err": errors,
+            "output_files": output_files,
+        }
+    except Exception as e:
+        return {
+            "code": code,
+            "success": False,
+            "std_err": f"Sandbox failed to execute code: {str(e)}",
+            "output_files": [],
+        }
+
+
+async def execute_terrarium(
+    code: str,
+    input_data: list[dict],
+    sandbox_url: str,
+) -> dict[str, Any]:
+    """Execute code using Terrarium sandbox"""
+    headers = {"Content-Type": "application/json"}
+    data = {"code": code, "files": input_data}
    async with aiohttp.ClientSession() as session:
        async with session.post(sandbox_url, json=data, headers=headers, timeout=30) as response:
            if response.status == 200:
                result: dict[str, Any] = await response.json()
-                result["code"] = cleaned_code
+                result["code"] = code
                # Store decoded output files
                result["output_files"] = result.get("output_files", [])
                for output_file in result["output_files"]:
@@ -202,7 +302,7 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
                return result
            else:
                return {
-                    "code": cleaned_code,
+                    "code": code,
                    "success": False,
                    "std_err": f"Failed to execute code with {response.status}",
                    "output_files": [],
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -321,6 +321,12 @@ def get_device() -> torch.device:
        return torch.device("cpu")


+def is_e2b_code_sandbox_enabled():
+    """Check if E2B code sandbox is enabled.
+    Set E2B_API_KEY environment variable to use it."""
+    return not is_none_or_empty(os.getenv("E2B_API_KEY"))
+
+
 class ConversationCommand(str, Enum):
    Default = "default"
    General = "general"
@@ -362,20 +368,23 @@ command_descriptions_for_agent = {
    ConversationCommand.Code: "Agent can run Python code to parse information, run complex calculations, create documents and charts.",
 }

+e2b_tool_description = "To run Python code in a E2B sandbox with no network access. Helpful to parse complex information, run calculations, create text documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely and rdkit external packages are available."
+terrarium_tool_description = "To run Python code in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available."
+
 tool_descriptions_for_llm = {
    ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
    ConversationCommand.General: "To use when you can answer the question without any outside information or personal knowledge",
    ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
    ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
    ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
-    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
+    ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
 }

 function_calling_description_for_llm = {
    ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
    ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
    ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
-    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
+    ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
 }

 mode_descriptions_for_llm = {