diff --git a/src/khoj/processor/operator/grounding_agent.py b/src/khoj/processor/operator/grounding_agent.py
index d6126cbe..61c36d9d 100644
--- a/src/khoj/processor/operator/grounding_agent.py
+++ b/src/khoj/processor/operator/grounding_agent.py
@@ -185,7 +185,7 @@ class GroundingAgent:
             },
         ]
 
-    async def act(self, instruction: str, current_state: EnvState) -> AgentActResult:
+    async def act(self, instruction: str, current_state: EnvState) -> tuple[str, list[OperatorAction]]:
         """Call the grounding LLM to get the next action based on the current state and instruction."""
         # Format the message for the API call
         messages_for_api = self._format_message_for_api(instruction, current_state)
@@ -204,7 +204,7 @@ class GroundingAgent:
 
             # Parse tool calls
             grounding_message = grounding_response.choices[0].message
-            action_results = self._parse_action(grounding_message, instruction, current_state)
+            rendered_response, actions = self._parse_action(grounding_message, instruction, current_state)
 
             # Update usage by grounding model
             self.tracer["usage"] = get_chat_usage_metrics(
@@ -215,10 +215,10 @@ class GroundingAgent:
             )
         except Exception as e:
             logger.error(f"Error calling Grounding LLM: {e}")
-            rendered_response = f"**Thought (Vision)**: {instruction}\n- **Error**: Error contacting Grounding LLM: {e}"
-            action_results = AgentActResult(actions=[], action_results=[], rendered_response=rendered_response)
+            rendered_response = f"**Error**: Error contacting Grounding LLM: {e}"
+            actions = []
 
-        return action_results
+        return rendered_response, actions
 
     def _format_message_for_api(self, instruction: str, current_state: EnvState) -> List:
         """Format the message for the API call."""
@@ -264,14 +264,13 @@ back() # Use this to go back to the previous page.
 
     def _parse_action(
         self, grounding_message: ChatCompletionMessage, instruction: str, current_state: EnvState
-    ) -> AgentActResult:
+    ) -> tuple[str, list[OperatorAction]]:
         """Parse the tool calls from the grounding LLM response and convert them to action objects."""
         actions: List[OperatorAction] = []
         action_results: List[dict] = []
 
         if grounding_message.tool_calls:
-            # Start rendering with vision output
-            rendered_parts = [f"**Thought (Vision)**: {instruction}"]
+            rendered_parts = []
             for tool_call in grounding_message.tool_calls:
                 function_name = tool_call.function.name
                 try:
@@ -336,17 +335,10 @@ back() # Use this to go back to the previous page.
         else:
             # Grounding LLM responded but didn't call a tool
             logger.warning("Grounding LLM did not produce a tool call.")
-            rendered_response = f"**Thought (Vision)**: {instruction}\n- **Response (Grounding)**: {grounding_message.content or '[No tool call]'}"
+            rendered_response = f"{grounding_message.content or 'No action required.'}"
 
         # Render the response
-        return AgentActResult(
-            actions=actions,
-            action_results=action_results,
-            rendered_response={
-                "text": rendered_response,
-                "image": f"data:image/webp;base64,{current_state.screenshot}",
-            },
-        )
+        return rendered_response, actions
 
     def reset(self):
         """Reset the agent state."""