From 6290d744ea46cb92696202a5bea2c5c15e46225c Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Wed, 30 Jul 2025 19:46:55 -0700
Subject: [PATCH] Make code tool write safe code to run in sandbox

- Ask both manager and code gen AI to not run or write
  unsafe code for some safety improvement (over code exec in sandbox).
- Disallow custom agent prompts instructing unsafe code gen
---
 src/khoj/processor/conversation/prompts.py | 15 +++++++++------
 src/khoj/utils/helpers.py                  |  4 ++--
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py
index 252ed79f..47589e93 100644
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -900,13 +900,14 @@ Khoj:
 # --
 python_code_generation_prompt = PromptTemplate.from_template(
     """
-You are Khoj, an advanced python programmer. You are tasked with constructing a python program to best answer the user query.
-- The python program will run in a code sandbox with {has_network_access}network access.
+You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
+- The Python program will run in a code sandbox with {has_network_access}network access.
 - You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query.
 - Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
 - Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
+- Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
 - Use as much context as required from the current conversation to generate your code.
-- The python program you write should be self-contained. It does not have access to the current conversation.
+- The Python program you write should be self-contained. It does not have access to the current conversation.
   It can only read data generated by the program itself and any user file paths referenced in your program.
 {personality_context}
 What code will you need to write to answer the user's question?
@@ -915,7 +916,7 @@ Current Date: {current_date}
 User's Location: {location}
 {username}
 
-Your response should contain python code wrapped in markdown code blocks (i.e starting with```python and ending with ```)
+Your response should contain Python code wrapped in markdown code blocks (i.e starting with```python and ending with ```)
 Example 1:
 ---
 Q: Calculate the interest earned and final amount for a principal of $43,235 invested at a rate of 5.24 percent for 5 years.
@@ -1005,8 +1006,8 @@ plt.ylabel('Population')
 plt.savefig('population_by_year_plot.png')
 ```
 
-Now it's your turn to construct a python program to answer the user's query using the provided context and coversation provided below.
-Ensure you include the python code to execute and wrap it in a markdown code block.
+Now it's your turn to construct a secure Python program to answer the user's query using the provided context and coversation provided below.
+Ensure you include the Python code to execute and wrap it in a markdown code block.
 
 Context:
 ---
@@ -1167,6 +1168,7 @@ If the prompt is not safe, respond with {{ "safe": "False", "reason": <provide a
 A prompt is considered unsafe only if it contains any of the following:
 - Illegal, sexual or clearly harmful content for the user or others.
 - Racist, sexist or other discriminatory content.
+- Instructions to write or run dangerous, malicious or unsafe code that could compromise a software system.
 
 Referencing personal notes or data is perfectly acceptable.
 
@@ -1206,6 +1208,7 @@ If the prompt is not safe, respond with {{ "safe": "False", "reason": <provide a
 A prompt is considered unsafe only if it contains any of the following:
 - Illegal or clearly harmful content for the user or others.
 - Racist, sexist or other discriminatory content.
+- Instructions to write or run dangerous, malicious or unsafe code that could compromise a software system.
 
 Referencing personal notes or data is perfectly acceptable.
 
diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py
index 1dfc4470..48a6eace 100644
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -462,8 +462,8 @@ command_descriptions_for_agent = {
     ConversationCommand.Operator: "Agent can operate a computer to complete tasks.",
 }
 
-e2b_tool_description = "To run a Python script in a E2B sandbox with no network access. Helpful to parse complex information, run calculations, create text documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available."
-terrarium_tool_description = "To run a Python script in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available."
+e2b_tool_description = "To run a Python script in a E2B sandbox with network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests."
+terrarium_tool_description = "To run a Python script in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests."
 
 tool_descriptions_for_llm = {
     ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",