mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Add action results for multiple actions similar to other operator agents
Adds the results of each action in a separate item in message content. Previously we were adding this as a single larger text blob. This changes adds structure to simplify post processing (e.g truncation). The updated add_action_results should also require less work to generalize if we pass tool call history to grounding model as action results in valid openai format.
This commit is contained in:
@@ -431,12 +431,11 @@ back() # Use this to go back to the previous page.
|
|||||||
|
|
||||||
if action_to_run:
|
if action_to_run:
|
||||||
actions.append(action_to_run)
|
actions.append(action_to_run)
|
||||||
# Prepare action result structure (similar to OpenAIOperatorAgent)
|
|
||||||
action_results.append(
|
action_results.append(
|
||||||
{
|
{
|
||||||
"type": "tool_result",
|
"type": "tool_result",
|
||||||
"tool_call_id": tool_call.id,
|
"tool_call_id": tool_call.id,
|
||||||
"content": None, # Updated by environment step
|
"content": None, # Updated after environment step
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
rendered_parts.append(action_render_str)
|
rendered_parts.append(action_render_str)
|
||||||
@@ -472,28 +471,30 @@ back() # Use this to go back to the previous page.
|
|||||||
if not agent_action.action_results:
|
if not agent_action.action_results:
|
||||||
return
|
return
|
||||||
|
|
||||||
tool_outputs = []
|
|
||||||
for idx, env_step in enumerate(env_steps):
|
for idx, env_step in enumerate(env_steps):
|
||||||
if idx < len(agent_action.action_results): # Ensure we don't go out of bounds
|
result_content = env_step.error or env_step.output or "[Action completed]"
|
||||||
result_content = env_step.error or env_step.output or "[Action completed]"
|
action_result = agent_action.action_results[idx]
|
||||||
tool_outputs.append(["Took screenshot" if env_step.type == "image" else json.dumps(result_content)])
|
if env_step.type == "image":
|
||||||
|
message = "**Action Result**: Took screenshot"
|
||||||
|
images = [f"data:image/png;base64,{convert_image_to_png(env_step.screenshot_base64)}"]
|
||||||
|
elif idx == len(env_steps) - 1:
|
||||||
|
message = f"**Action Result**: {json.dumps(result_content)}"
|
||||||
|
images = [f"data:image/png;base64,{convert_image_to_png(env_step.screenshot_base64)}"]
|
||||||
else:
|
else:
|
||||||
logger.warning(
|
message = f"**Action Result**: {json.dumps(result_content)}"
|
||||||
f"Mismatch between env_steps ({len(env_steps)}) and action_results ({len(agent_action.action_results)})"
|
images = []
|
||||||
)
|
action_result["content"] = construct_structured_message(
|
||||||
|
message=message,
|
||||||
# Append tool results message to history
|
images=images,
|
||||||
if tool_outputs:
|
|
||||||
tool_outputs_list = "\n".join([f"- {idx}: {str(item)}" for idx, item in enumerate(tool_outputs)])
|
|
||||||
tool_outputs_str = "**Action Results**:\n" + tool_outputs_list
|
|
||||||
formatted_screenshot = f"data:image/png;base64,{convert_image_to_png(env_step.screenshot_base64)}"
|
|
||||||
tool_output_content = construct_structured_message(
|
|
||||||
message=tool_outputs_str,
|
|
||||||
images=[formatted_screenshot],
|
|
||||||
model_type=self.reasoning_model.model_type,
|
model_type=self.reasoning_model.model_type,
|
||||||
vision_enabled=True,
|
vision_enabled=True,
|
||||||
)
|
)
|
||||||
self.messages.append(AgentMessage(role="environment", content=tool_output_content))
|
|
||||||
|
# Append action results to history
|
||||||
|
action_results_content = []
|
||||||
|
for action_result in agent_action.action_results:
|
||||||
|
action_results_content.extend(action_result["content"])
|
||||||
|
self.messages.append(AgentMessage(role="environment", content=action_results_content))
|
||||||
|
|
||||||
async def summarize(self, summarize_prompt: str, env_state: EnvState) -> str:
|
async def summarize(self, summarize_prompt: str, env_state: EnvState) -> str:
|
||||||
conversation_history = self._format_message_for_api(self.messages)
|
conversation_history = self._format_message_for_api(self.messages)
|
||||||
|
|||||||
Reference in New Issue
Block a user