mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Allow visual grounder of binary operator agent to see past actions
Previously the grounding agent would be reset on every call. So it only saw the most recent instruction and screenshot to make its next action suggestion. This change allows the visual grounders to see past instructions and actions to prevent looping and encourage more exploratory action suggestions by it when stuck or see errors.
This commit is contained in:
@@ -144,6 +144,8 @@ async def operate_browser(
|
||||
finally:
|
||||
if environment and not user_input_message: # Don't close browser if user input required
|
||||
await environment.close()
|
||||
if operator_agent:
|
||||
operator_agent.reset()
|
||||
|
||||
yield {
|
||||
"text": user_input_message or response,
|
||||
|
||||
@@ -168,7 +168,6 @@ Focus on the visual action and provide all necessary context.
|
||||
actions: List[OperatorAction] = []
|
||||
action_results: List[dict] = []
|
||||
rendered_parts = [f"**Thought (Vision)**: {action_instruction}"]
|
||||
self.grounding_agent.reset() # Reset grounding agent state
|
||||
|
||||
try:
|
||||
grounding_response, actions = await self.grounding_agent.act(action_instruction, current_state)
|
||||
@@ -318,3 +317,4 @@ Focus on the visual action and provide all necessary context.
|
||||
def reset(self):
|
||||
"""Reset the agent state."""
|
||||
super().reset()
|
||||
self.grounding_agent.reset() # Reset grounding agent state
|
||||
|
||||
Reference in New Issue
Block a user