diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 988629cc..383bfe71 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -1121,7 +1121,7 @@ terrarium_sandbox_context = """ operator_execution_context = PromptTemplate.from_template( """ -Use the provided context from operating a browser to inform your response. +Use the results of operating a web browser to inform your response. Browser Operation Results: {operator_results} diff --git a/src/khoj/processor/operator/grounding_agent_uitars.py b/src/khoj/processor/operator/grounding_agent_uitars.py index 1646cd5a..21a3109d 100644 --- a/src/khoj/processor/operator/grounding_agent_uitars.py +++ b/src/khoj/processor/operator/grounding_agent_uitars.py @@ -38,6 +38,7 @@ class GroundingAgentUitars: UITARS_USR_PROMPT_THOUGHT = """ You are a GUI agent. You are given a task and a screenshot of the web browser tab you operate. You need to perform the next action to complete the task. You control a single tab in a Chromium browser. You cannot access the OS, filesystem, the application window or the addressbar. + Try fulfill the user instruction to the best of your ability, especially when the instruction is given multiple times. Do not ignore the instruction. ## Output Format ``` diff --git a/src/khoj/processor/operator/operator_agent_binary.py b/src/khoj/processor/operator/operator_agent_binary.py index c506070e..4010b84e 100644 --- a/src/khoj/processor/operator/operator_agent_binary.py +++ b/src/khoj/processor/operator/operator_agent_binary.py @@ -90,16 +90,17 @@ class BinaryOperatorAgent(OperatorAgent): """ reasoning_system_prompt = f""" # Introduction -* You are Khoj, a smart web browsing assistant. You help the user accomplish their task using a web browser. +* You are Khoj, a smart and resourceful web browsing assistant. You help the user accomplish their task using a web browser. * You are given the user's query and screenshots of the browser's state transitions. * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}. * The current URL is {current_state.url}. # Your Task * First look at the screenshots carefully to notice all pertinent information. -* Then instruct a tool AI to perform the single most important next action to progress towards the user's goal. +* Then instruct a tool AI to perform the next action that will help you progress towards the user's goal. * Make sure you scroll down to see everything before deciding something isn't available. * Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail. +* Use your creativity to find alternate ways to make progress if you get stuck at any point. # Tool AI Capabilities * The tool AI only has access to the current screenshot and your instructions. It uses your instructions to perform the next action on the page.