From e5415bdaeeb59d0dfbdb0d32e274e8e0a1748660 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Thu, 8 May 2025 23:31:55 -0600
Subject: [PATCH] Only reasoning agent should terminate run, not the grounding
 agent.

Grounding agent does not have the full context and capabilities to
make this call. Only let reasoning agent make termination decision.

Add a wait action instead when grounder requests termination.
---
 .../processor/operator/operator_agent_binary.py    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/khoj/processor/operator/operator_agent_binary.py b/src/khoj/processor/operator/operator_agent_binary.py
index 4045f3be..6f99afb6 100644
--- a/src/khoj/processor/operator/operator_agent_binary.py
+++ b/src/khoj/processor/operator/operator_agent_binary.py
@@ -177,18 +177,18 @@ Focus on the visual action and provide all necessary context.
 
             # Process grounding response
             if grounding_response.strip().endswith("DONE"):
-                rendered_parts += ["Completed task."]
+                # Ignore DONE response by the grounding agent. Reasoning agent handles termination.
+                actions.append(WaitAction(duration=1.0))
+                rendered_parts += ["Nothing to do."]
             elif grounding_response.strip().endswith("FAIL"):
-                rendered_parts += ["Error in grounding LLM response."]
+                # Ignore FAIL response by the grounding agent. Reasoning agent handles termination.
+                actions.append(WaitAction(duration=1.0))
+                rendered_parts += ["Could not process response."]
             else:
                 rendered_parts += [f"**Thought (Grounding)**: {grounding_response}"]
                 for action in actions:
                     rendered_parts += [f"**Action**: {action}"]
-                    action_results += [
-                        {
-                            "content": None,  # Updated after environment step
-                        }
-                    ]
+            action_results += [{"content": None}]  # content set after environment step
         except Exception as e:
             logger.error(f"Error calling Grounding LLM: {e}")
             rendered_parts += [f"**Error**: Error contacting Grounding LLM: {e}"]