From 804473320103df69df24601cd40d6953860f00ee Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 9 Oct 2024 13:37:06 -0700 Subject: [PATCH] Give Khoj ability to run python code as a tool triggered via chat API Create python code executing chat actor - The chat actor generate python code within sandbox constraints - Run the generated python code in the cohere terrarium, pyodide based sandbox accessible at sandbox url --- .../conversation/anthropic/anthropic_chat.py | 5 + .../conversation/google/gemini_chat.py | 5 + .../conversation/offline/chat_model.py | 7 +- src/khoj/processor/conversation/openai/gpt.py | 5 + src/khoj/processor/conversation/prompts.py | 42 +++++++ src/khoj/processor/conversation/utils.py | 2 + src/khoj/routers/api_chat.py | 28 ++++- src/khoj/routers/helpers.py | 103 ++++++++++++++++++ src/khoj/utils/helpers.py | 7 +- 9 files changed, 200 insertions(+), 4 deletions(-) diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index cb51abb4..c980c00d 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -126,6 +126,7 @@ def converse_anthropic( references, user_query, online_results: Optional[Dict[str, Dict]] = None, + code_results: Optional[Dict[str, Dict]] = None, conversation_log={}, model: Optional[str] = "claude-instant-1.2", api_key: Optional[str] = None, @@ -175,6 +176,10 @@ def converse_anthropic( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) + if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results): + conversation_primer = ( + f"{prompts.code_executed_context.format(code_results=str(code_results))}\n{conversation_primer}" + ) if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: conversation_primer = ( f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}" diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 7359b3eb..5735799e 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -122,6 +122,7 @@ def converse_gemini( references, user_query, online_results: Optional[Dict[str, Dict]] = None, + code_results: Optional[Dict[str, Dict]] = None, conversation_log={}, model: Optional[str] = "gemini-1.5-flash", api_key: Optional[str] = None, @@ -173,6 +174,10 @@ def converse_gemini( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) + if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results): + conversation_primer = ( + f"{prompts.code_executed_context.format(code_results=str(code_results))}\n{conversation_primer}" + ) if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands: conversation_primer = ( f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}" diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 4eafae00..d9d99f21 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -135,7 +135,8 @@ def filter_questions(questions: List[str]): def converse_offline( user_query, references=[], - online_results=[], + online_results={}, + code_results={}, conversation_log={}, model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", loaded_model: Union[Any, None] = None, @@ -187,6 +188,10 @@ def converse_offline( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) + if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results): + conversation_primer = ( + f"{prompts.code_executed_context.format(code_results=str(code_results))}\n{conversation_primer}" + ) if ConversationCommand.Online in conversation_commands: simplified_online_results = online_results.copy() for result in online_results: diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index ad02b10e..a4850cfd 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -123,6 +123,7 @@ def converse( references, user_query, online_results: Optional[Dict[str, Dict]] = None, + code_results: Optional[Dict[str, Dict]] = None, conversation_log={}, model: str = "gpt-4o-mini", api_key: Optional[str] = None, @@ -176,6 +177,10 @@ def converse( completion_func(chat_response=prompts.no_online_results_found.format()) return iter([prompts.no_online_results_found.format()]) + if not is_none_or_empty(code_results): + conversation_primer = ( + f"{prompts.code_executed_context.format(code_results=str(code_results))}\n{conversation_primer}" + ) if not is_none_or_empty(online_results): conversation_primer = ( f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}" diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 0738af4e..23788cab 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -730,6 +730,48 @@ Khoj: """.strip() ) +# Code Generation +# -- +python_code_generation_prompt = PromptTemplate.from_template( + """ +You are Khoj, an advanced python programmer. You are tasked with constructing **up to three** python programs to best answer the user query. +- The python program will run in a pyodide python sandbox with no network access. +- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query +- The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4, sympy, brotli, cryptography, fast-parquet +- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead. +- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user. +- Use as much context from the previous questions and answers as required to generate your code. +{personality_context} +What code will you need to write, if any, to answer the user's question? +Provide code programs as a list of strings in a JSON object with key "codes". +Current Date: {current_date} +User's Location: {location} +{username} + +The JSON schema is of the form {{"codes": ["code1", "code2", "code3"]}} +For example: +{{"codes": ["print('Hello, World!')", "print('Goodbye, World!')"]}} + +Now it's your turn to construct python programs to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else. +History: +{chat_history} + +User: {query} +Khoj: +""".strip() +) + +code_executed_context = PromptTemplate.from_template( + """ +Use the provided code executions to inform your response. +Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided code execution results or past conversations. + +Code Execution Results: +{code_results} +""".strip() +) + + # Automations # -- crontime_prompt = PromptTemplate.from_template( diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index e841c484..9a2ba230 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -104,6 +104,7 @@ def save_to_conversation_log( user_message_time: str = None, compiled_references: List[Dict[str, Any]] = [], online_results: Dict[str, Any] = {}, + code_results: Dict[str, Any] = {}, inferred_queries: List[str] = [], intent_type: str = "remember", client_application: ClientApplication = None, @@ -123,6 +124,7 @@ def save_to_conversation_log( "context": compiled_references, "intent": {"inferred-queries": inferred_queries, "type": intent_type}, "onlineContext": online_results, + "codeContext": code_results, "automationId": automation_id, }, conversation_log=meta_log.get("chat", []), diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index b2689a7f..cdf16bd9 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -3,7 +3,6 @@ import base64 import json import logging import time -import warnings from datetime import datetime from functools import partial from typing import Any, Dict, List, Optional @@ -47,6 +46,7 @@ from khoj.routers.helpers import ( is_query_empty, is_ready_to_chat, read_chat_stream, + run_code, update_telemetry_state, validate_conversation_config, ) @@ -950,6 +950,30 @@ async def chat( exc_info=True, ) + ## Gather Code Results + if ConversationCommand.Code in conversation_commands: + try: + async for result in run_code( + defiltered_query, + meta_log, + location, + user, + partial(send_event, ChatEvent.STATUS), + uploaded_image_url=uploaded_image_url, + agent=agent, + ): + if isinstance(result, dict) and ChatEvent.STATUS in result: + yield result[ChatEvent.STATUS] + else: + code_results = result + async for result in send_event(ChatEvent.STATUS, f"**Ran code snippets**: {len(code_results)}"): + yield result + except ValueError as e: + logger.warning( + f"Failed to use code tool: {e}. Attempting to respond without code results", + exc_info=True, + ) + ## Send Gathered References async for result in send_event( ChatEvent.REFERENCES, @@ -957,6 +981,7 @@ async def chat( "inferredQueries": inferred_queries, "context": compiled_references, "onlineContext": online_results, + "codeContext": code_results, }, ): yield result @@ -1024,6 +1049,7 @@ async def chat( conversation, compiled_references, online_results, + code_results, inferred_queries, conversation_commands, user, diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index e4ebdb51..f17fe1f5 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -24,6 +24,7 @@ from typing import ( ) from urllib.parse import parse_qs, quote, urljoin, urlparse +import aiohttp import cron_descriptor import pytz import requests @@ -519,6 +520,103 @@ async def generate_online_subqueries( return [q] +async def run_code( + query: str, + conversation_history: dict, + location_data: LocationData, + user: KhojUser, + send_status_func: Optional[Callable] = None, + uploaded_image_url: str = None, + agent: Agent = None, + sandbox_url: str = "http://localhost:8080", +): + # Generate Code + if send_status_func: + async for event in send_status_func(f"**Generate code snippets** for {query}"): + yield {ChatEvent.STATUS: event} + try: + with timer("Chat actor: Generate programs to execute", logger): + codes = await generate_python_code( + query, conversation_history, location_data, user, uploaded_image_url, agent + ) + except Exception as e: + raise ValueError(f"Failed to generate code for {query} with error: {e}") + + # Run Code + if send_status_func: + async for event in send_status_func(f"**Running {len(codes)} code snippets**"): + yield {ChatEvent.STATUS: event} + try: + tasks = [execute_sandboxed_python(code, sandbox_url) for code in codes] + with timer("Chat actor: Execute generated programs", logger): + results = await asyncio.gather(*tasks) + for result in results: + code = result.pop("code") + logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--") + yield {query: {"code": code, "results": result}} + except Exception as e: + raise ValueError(f"Failed to run code for {query} with error: {e}") + + +async def generate_python_code( + q: str, + conversation_history: dict, + location_data: LocationData, + user: KhojUser, + uploaded_image_url: str = None, + agent: Agent = None, +) -> List[str]: + location = f"{location_data}" if location_data else "Unknown" + username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else "" + chat_history = construct_chat_history(conversation_history) + + utc_date = datetime.utcnow().strftime("%Y-%m-%d") + personality_context = ( + prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else "" + ) + + code_generation_prompt = prompts.python_code_generation_prompt.format( + current_date=utc_date, + query=q, + chat_history=chat_history, + location=location, + username=username, + personality_context=personality_context, + ) + + response = await send_message_to_model_wrapper( + code_generation_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user + ) + + # Validate that the response is a non-empty, JSON-serializable list + response = response.strip() + response = remove_json_codeblock(response) + response = json.loads(response) + codes = [code.strip() for code in response["codes"] if code.strip()] + + if not isinstance(codes, list) or not codes or len(codes) == 0: + raise ValueError + return codes + + +async def execute_sandboxed_python(code: str, sandbox_url: str = "http://localhost:8080") -> dict[str, Any]: + """ + Takes code to run as a string and calls the terrarium API to execute it. + Returns the result of the code execution as a dictionary. + """ + headers = {"Content-Type": "application/json"} + data = {"code": code} + + async with aiohttp.ClientSession() as session: + async with session.post(sandbox_url, json=data, headers=headers) as response: + if response.status == 200: + result: dict[str, Any] = await response.json() + result["code"] = code + return result + else: + return {"code": code, "success": False, "std_err": f"Failed to execute code with {response.status}"} + + async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]: """ Schedule the date, time to run the query. Assume the server timezone is UTC. @@ -949,6 +1047,7 @@ def generate_chat_response( conversation: Conversation, compiled_references: List[Dict] = [], online_results: Dict[str, Dict] = {}, + code_results: Dict[str, Dict] = {}, inferred_queries: List[str] = [], conversation_commands: List[ConversationCommand] = [ConversationCommand.Default], user: KhojUser = None, @@ -976,6 +1075,7 @@ def generate_chat_response( meta_log=meta_log, compiled_references=compiled_references, online_results=online_results, + code_results=code_results, inferred_queries=inferred_queries, client_application=client_application, conversation_id=conversation_id, @@ -1017,6 +1117,7 @@ def generate_chat_response( query_to_run, image_url=uploaded_image_url, online_results=online_results, + code_results=code_results, conversation_log=meta_log, model=chat_model, api_key=api_key, @@ -1037,6 +1138,7 @@ def generate_chat_response( compiled_references, query_to_run, online_results, + code_results, meta_log, model=conversation_config.chat_model, api_key=api_key, @@ -1054,6 +1156,7 @@ def generate_chat_response( compiled_references, query_to_run, online_results, + code_results, meta_log, model=conversation_config.chat_model, api_key=api_key, diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 9ed8ffa2..0e0193a9 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -309,6 +309,7 @@ class ConversationCommand(str, Enum): Help = "help" Online = "online" Webpage = "webpage" + Code = "code" Image = "image" Text = "text" Automation = "automation" @@ -322,6 +323,7 @@ command_descriptions = { ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.", ConversationCommand.Online: "Search for information on the internet.", ConversationCommand.Webpage: "Get information from webpage suggested by you.", + ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.", ConversationCommand.Image: "Generate images by describing your imagination in words.", ConversationCommand.Automation: "Automatically run your query at a specified time or interval.", ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation", @@ -342,6 +344,7 @@ tool_descriptions_for_llm = { ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.", ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**", ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.", + ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.", ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.", } @@ -352,13 +355,13 @@ function_calling_description_for_llm = { } mode_descriptions_for_llm = { - ConversationCommand.Image: "Use this if the user is requesting you to generate a picture based on their description.", + ConversationCommand.Image: "Use this if the user is requesting you to generate images based on their description. This does not support generating charts or graphs.", ConversationCommand.Automation: "Use this if the user is requesting a response at a scheduled date or time.", ConversationCommand.Text: "Use this if the other response modes don't seem to fit the query.", } mode_descriptions_for_agent = { - ConversationCommand.Image: "Agent can generate image in response.", + ConversationCommand.Image: "Agent can generate images in response. It cannot not use this to generate charts and graphs.", ConversationCommand.Text: "Agent can generate text in response.", }