mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Capture more files generated by code execution in sandbox
This change had been removed in 9a8c707 to avoid overwrites. We now
use random filename for generated files to avoid overwrite from
subsequent runs.
Encourage model to write code that writes files in home folder to
capture with logical filenames.
This commit is contained in:
@@ -880,8 +880,8 @@ python_code_generation_prompt = PromptTemplate.from_template(
|
||||
You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
|
||||
- The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
|
||||
- You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
|
||||
- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
|
||||
- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
|
||||
- Do not try display images or plots in the code directly. The code should save the image or plot to a file in the home directory instead.
|
||||
- Write any document, charts etc. to be shared with the user to file. Files saved in the home directory can be seen by the user.
|
||||
- Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
|
||||
- Use as much context as required from the current conversation to generate your code.
|
||||
- The Python program you write should be self-contained. It does not have access to the current conversation.
|
||||
|
||||
@@ -5,6 +5,7 @@ import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, List, NamedTuple, Optional
|
||||
|
||||
@@ -272,6 +273,21 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
|
||||
b64_data = content
|
||||
output_files.append({"filename": f.name, "b64_data": b64_data})
|
||||
|
||||
# Collect output files from execution results
|
||||
# Repect ordering of output result types to disregard text output associated with images
|
||||
downloaded_dataset = {f["b64_data"] for f in output_files}
|
||||
output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
|
||||
for result in execution.results:
|
||||
if getattr(result, "chart", None):
|
||||
continue
|
||||
for result_type in output_result_types:
|
||||
b64_data = getattr(result, result_type, None)
|
||||
# Generate random filename if not already downloaded
|
||||
if b64_data and b64_data not in downloaded_dataset:
|
||||
filename = f"/tmp/{uuid.uuid4()}.{result_type}"
|
||||
output_files.append({"filename": filename, "b64_data": b64_data})
|
||||
break
|
||||
|
||||
# collect logs
|
||||
success = not execution.error and not execution.logs.stderr
|
||||
stdout = "\n".join(execution.logs.stdout)
|
||||
|
||||
Reference in New Issue
Block a user