From e5415bdaeeb59d0dfbdb0d32e274e8e0a1748660 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Thu, 8 May 2025 23:31:55 -0600 Subject: [PATCH] Only reasoning agent should terminate run, not the grounding agent. Grounding agent does not have the full context and capabilities to make this call. Only let reasoning agent make termination decision. Add a wait action instead when grounder requests termination. --- .../processor/operator/operator_agent_binary.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/khoj/processor/operator/operator_agent_binary.py b/src/khoj/processor/operator/operator_agent_binary.py index 4045f3be..6f99afb6 100644 --- a/src/khoj/processor/operator/operator_agent_binary.py +++ b/src/khoj/processor/operator/operator_agent_binary.py @@ -177,18 +177,18 @@ Focus on the visual action and provide all necessary context. # Process grounding response if grounding_response.strip().endswith("DONE"): - rendered_parts += ["Completed task."] + # Ignore DONE response by the grounding agent. Reasoning agent handles termination. + actions.append(WaitAction(duration=1.0)) + rendered_parts += ["Nothing to do."] elif grounding_response.strip().endswith("FAIL"): - rendered_parts += ["Error in grounding LLM response."] + # Ignore FAIL response by the grounding agent. Reasoning agent handles termination. + actions.append(WaitAction(duration=1.0)) + rendered_parts += ["Could not process response."] else: rendered_parts += [f"**Thought (Grounding)**: {grounding_response}"] for action in actions: rendered_parts += [f"**Action**: {action}"] - action_results += [ - { - "content": None, # Updated after environment step - } - ] + action_results += [{"content": None}] # content set after environment step except Exception as e: logger.error(f"Error calling Grounding LLM: {e}") rendered_parts += [f"**Error**: Error contacting Grounding LLM: {e}"]