mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Handle reasoning messages returned by openai cua model
Documentation about this is currently limited, confusing. But it seems like reasoning item should be kept if computer_call after, else drop. Add noop placeholder for reasoning item to prevent termination of operator run on response with just reasoning.
This commit is contained in:
@@ -164,7 +164,16 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|||||||
rendered_response["text"] = response.output_text
|
rendered_response["text"] = response.output_text
|
||||||
elif block.type == "reasoning":
|
elif block.type == "reasoning":
|
||||||
actions.append(NoopAction())
|
actions.append(NoopAction())
|
||||||
action_results.append(block)
|
# Add placeholder action result for reasoning
|
||||||
|
# This is to prevent run termination.
|
||||||
|
# It will be removed later by add_action_results func
|
||||||
|
action_results.append(
|
||||||
|
{
|
||||||
|
"type": block.type,
|
||||||
|
"id": block.id,
|
||||||
|
"summary": [],
|
||||||
|
}
|
||||||
|
)
|
||||||
if action_to_run or content:
|
if action_to_run or content:
|
||||||
actions.append(action_to_run)
|
actions.append(action_to_run)
|
||||||
if action_to_run or content:
|
if action_to_run or content:
|
||||||
@@ -190,6 +199,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# Update action results with results of applying suggested actions on the environment
|
# Update action results with results of applying suggested actions on the environment
|
||||||
|
items_to_pop = []
|
||||||
for idx, env_step in enumerate(env_steps):
|
for idx, env_step in enumerate(env_steps):
|
||||||
action_result = agent_action.action_results[idx]
|
action_result = agent_action.action_results[idx]
|
||||||
result_content = env_step.error or env_step.output or "[Action completed]"
|
result_content = env_step.error or env_step.output or "[Action completed]"
|
||||||
@@ -207,10 +217,16 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|||||||
"image_url": f"data:image/webp;base64,{env_step.screenshot_base64}",
|
"image_url": f"data:image/webp;base64,{env_step.screenshot_base64}",
|
||||||
"current_url": env_step.current_url,
|
"current_url": env_step.current_url,
|
||||||
}
|
}
|
||||||
|
elif action_result["type"] == "reasoning":
|
||||||
|
items_to_pop.append(idx) # Mark placeholder reasoning action result for removal
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
# Add text data
|
# Add text data
|
||||||
action_result["output"] = result_content
|
action_result["output"] = result_content
|
||||||
|
|
||||||
|
for idx in reversed(items_to_pop):
|
||||||
|
agent_action.action_results.pop(idx)
|
||||||
|
|
||||||
self.messages += [AgentMessage(role="environment", content=agent_action.action_results)]
|
self.messages += [AgentMessage(role="environment", content=agent_action.action_results)]
|
||||||
|
|
||||||
def _format_message_for_api(self, messages: list[AgentMessage]) -> list:
|
def _format_message_for_api(self, messages: list[AgentMessage]) -> list:
|
||||||
@@ -219,6 +235,21 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|||||||
for message in messages:
|
for message in messages:
|
||||||
if message.role == "environment":
|
if message.role == "environment":
|
||||||
if isinstance(message.content, list):
|
if isinstance(message.content, list):
|
||||||
|
# Remove reasoning message if not followed by computer call
|
||||||
|
if (
|
||||||
|
len(message.content) > 1
|
||||||
|
and all(hasattr(item, "type") for item in message.content)
|
||||||
|
and message.content[0].type == "reasoning"
|
||||||
|
and message.content[1].type != "computer_call"
|
||||||
|
) or (
|
||||||
|
len(message.content) == 1
|
||||||
|
and all(hasattr(item, "type") for item in message.content)
|
||||||
|
and message.content[0].type == "reasoning"
|
||||||
|
):
|
||||||
|
logger.warning(
|
||||||
|
f"Removing reasoning message not followed by a computer call action: {message.content}"
|
||||||
|
)
|
||||||
|
message.content.pop(0)
|
||||||
formatted_messages.extend(message.content)
|
formatted_messages.extend(message.content)
|
||||||
else:
|
else:
|
||||||
logger.warning(f"Expected message content list from environment, got {type(message.content)}")
|
logger.warning(f"Expected message content list from environment, got {type(message.content)}")
|
||||||
@@ -242,7 +273,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|||||||
# Handle case where response_content is a dictionary and not ResponseOutputItem
|
# Handle case where response_content is a dictionary and not ResponseOutputItem
|
||||||
# This is the case when response_content contains action results
|
# This is the case when response_content contains action results
|
||||||
if not hasattr(response_content[0], "type"):
|
if not hasattr(response_content[0], "type"):
|
||||||
return "**Action**: " + json.dumps(response_content[0]["output"])
|
return "**Action**: " + json.dumps(response_content[0].get("output", "Noop"))
|
||||||
|
|
||||||
compiled_response = [""]
|
compiled_response = [""]
|
||||||
for block in deepcopy(response_content):
|
for block in deepcopy(response_content):
|
||||||
|
|||||||
Reference in New Issue
Block a user