From 7395af3c3a7bd1eed6a43749951084e52e7ca450 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Thu, 8 May 2025 20:27:09 -0600 Subject: [PATCH] Allow visual grounder of binary operator agent to see past actions Previously the grounding agent would be reset on every call. So it only saw the most recent instruction and screenshot to make its next action suggestion. This change allows the visual grounders to see past instructions and actions to prevent looping and encourage more exploratory action suggestions by it when stuck or see errors. --- src/khoj/processor/operator/operate_browser.py | 2 ++ src/khoj/processor/operator/operator_agent_binary.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/khoj/processor/operator/operate_browser.py b/src/khoj/processor/operator/operate_browser.py index e5a56bfc..871f1288 100644 --- a/src/khoj/processor/operator/operate_browser.py +++ b/src/khoj/processor/operator/operate_browser.py @@ -144,6 +144,8 @@ async def operate_browser( finally: if environment and not user_input_message: # Don't close browser if user input required await environment.close() + if operator_agent: + operator_agent.reset() yield { "text": user_input_message or response, diff --git a/src/khoj/processor/operator/operator_agent_binary.py b/src/khoj/processor/operator/operator_agent_binary.py index c0d39726..6573f906 100644 --- a/src/khoj/processor/operator/operator_agent_binary.py +++ b/src/khoj/processor/operator/operator_agent_binary.py @@ -168,7 +168,6 @@ Focus on the visual action and provide all necessary context. actions: List[OperatorAction] = [] action_results: List[dict] = [] rendered_parts = [f"**Thought (Vision)**: {action_instruction}"] - self.grounding_agent.reset() # Reset grounding agent state try: grounding_response, actions = await self.grounding_agent.act(action_instruction, current_state) @@ -318,3 +317,4 @@ Focus on the visual action and provide all necessary context. def reset(self): """Reset the agent state.""" super().reset() + self.grounding_agent.reset() # Reset grounding agent state