Fix mypy typing errors in operator environment files

- Add type guards for action.path in drag vs text editor actions
- Added type guards for Union type attribute access
- Fixed variable naming conflicts between drag and text editor cases
- Resolved remaining typing issues in OpenAI, Anthropic agents
- Type guard without requiring another code indent level
This commit is contained in:
Debanjum
2025-05-31 02:59:53 -07:00
parent c5c06a086e
commit 6821bd38ed
4 changed files with 53 additions and 33 deletions

View File

@@ -392,7 +392,7 @@ class AnthropicOperatorAgent(OperatorAgent):
client = get_anthropic_async_client(model.ai_model_api.api_key, model.ai_model_api.api_base_url) client = get_anthropic_async_client(model.ai_model_api.api_key, model.ai_model_api.api_base_url)
thinking: dict[str, str | int] = {"type": "disabled"} thinking: dict[str, str | int] = {"type": "disabled"}
system = [{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}}] system = [{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}}]
kwargs = {} kwargs: dict = {}
if is_reasoning_model(model.name): if is_reasoning_model(model.name):
thinking = {"type": "enabled", "budget_tokens": 1024} thinking = {"type": "enabled", "budget_tokens": 1024}
if headers: if headers:
@@ -572,7 +572,7 @@ class AnthropicOperatorAgent(OperatorAgent):
def get_tools(self, environment: EnvironmentType, current_state: EnvState) -> list[dict]: def get_tools(self, environment: EnvironmentType, current_state: EnvState) -> list[dict]:
"""Return the tools available for the Anthropic operator.""" """Return the tools available for the Anthropic operator."""
tools = [ tools: list[dict] = [
{ {
"type": self.model_default_tool("computer")["type"], "type": self.model_default_tool("computer")["type"],
"name": "computer", "name": "computer",

View File

@@ -166,6 +166,7 @@ class OpenAIOperatorAgent(OperatorAgent):
items_to_pop.append(idx) # Mark placeholder reasoning action result for removal items_to_pop.append(idx) # Mark placeholder reasoning action result for removal
continue continue
elif action_result["type"] == "computer_call" and action_result["status"] == "in_progress": elif action_result["type"] == "computer_call" and action_result["status"] == "in_progress":
if isinstance(result_content, dict):
result_content["status"] = "completed" # Mark in-progress actions as completed result_content["status"] = "completed" # Mark in-progress actions as completed
action_result["output"] = result_content action_result["output"] = result_content
else: else:

View File

@@ -5,7 +5,7 @@ import logging
import os import os
from typing import Optional, Set, Union from typing import Optional, Set, Union
from khoj.processor.operator.operator_actions import OperatorAction, Point from khoj.processor.operator.operator_actions import DragAction, OperatorAction, Point
from khoj.processor.operator.operator_environment_base import ( from khoj.processor.operator.operator_environment_base import (
Environment, Environment,
EnvState, EnvState,
@@ -246,6 +246,8 @@ class BrowserEnvironment(Environment):
logger.debug(f"Action: {action.type} to ({x},{y})") logger.debug(f"Action: {action.type} to ({x},{y})")
case "drag": case "drag":
if not isinstance(action, DragAction):
raise TypeError(f"Invalid action type for drag")
path = action.path path = action.path
if not path: if not path:
error = "Missing path for drag action" error = "Missing path for drag action"

View File

@@ -5,12 +5,11 @@ import io
import logging import logging
import platform import platform
import subprocess import subprocess
from pathlib import Path
from typing import Literal, Optional, Union from typing import Literal, Optional, Union
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from khoj.processor.operator.operator_actions import OperatorAction, Point from khoj.processor.operator.operator_actions import DragAction, OperatorAction, Point
from khoj.processor.operator.operator_environment_base import ( from khoj.processor.operator.operator_environment_base import (
Environment, Environment,
EnvState, EnvState,
@@ -272,19 +271,21 @@ class ComputerEnvironment(Environment):
logger.debug(f"Action: {action.type} to ({x},{y})") logger.debug(f"Action: {action.type} to ({x},{y})")
case "drag": case "drag":
path = action.path if not isinstance(action, DragAction):
if not path: raise TypeError("Invalid action type for drag")
drag_path = action.path
if not drag_path:
error = "Missing path for drag action" error = "Missing path for drag action"
else: else:
start_x, start_y = path[0].x, path[0].y start_x, start_y = drag_path[0].x, drag_path[0].y
await self._execute("moveTo", start_x, start_y, duration=0.1) await self._execute("moveTo", start_x, start_y, duration=0.1)
await self._execute("mouseDown") await self._execute("mouseDown")
for point in path[1:]: for point in drag_path[1:]:
await self._execute("moveTo", point.x, point.y, duration=0.05) await self._execute("moveTo", point.x, point.y, duration=0.05)
await self._execute("mouseUp") await self._execute("mouseUp")
self.mouse_pos = Point(x=path[-1].x, y=path[-1].y) self.mouse_pos = Point(x=drag_path[-1].x, y=drag_path[-1].y)
output = f"Drag along path starting at ({start_x},{start_y})" output = f"Drag along path starting at ({start_x},{start_y})"
logger.debug(f"Action: {action.type} with {len(path)} points") logger.debug(f"Action: {action.type} with {len(drag_path)} points")
case "mouse_down": case "mouse_down":
pyautogui_button = action.button.lower() if action.button else "left" pyautogui_button = action.button.lower() if action.button else "left"
@@ -352,9 +353,12 @@ class ComputerEnvironment(Environment):
case "text_editor_view": case "text_editor_view":
# View file contents # View file contents
path = action.path file_path = action.path
view_range = action.view_range view_range = action.view_range
escaped_path = path.replace("'", "'\"'\"'") # Type guard: path should be str for text editor actions
if not isinstance(file_path, str):
raise TypeError("Invalid path type for text editor view action")
escaped_path = file_path.replace("'", "'\"'\"'")
is_dir = await self._execute("os.path.isdir", escaped_path) is_dir = await self._execute("os.path.isdir", escaped_path)
if is_dir: if is_dir:
cmd = rf"find {escaped_path} -maxdepth 2 -not -path '*/\.*'" cmd = rf"find {escaped_path} -maxdepth 2 -not -path '*/\.*'"
@@ -373,18 +377,21 @@ class ComputerEnvironment(Environment):
result["output"] = f"{result['output'][:MAX_OUTPUT_LENGTH]}..." result["output"] = f"{result['output'][:MAX_OUTPUT_LENGTH]}..."
if result["success"]: if result["success"]:
if is_dir: if is_dir:
output = f"Here's the files and directories up to 2 levels deep in {path}, excluding hidden items:\n{result['output']}" output = f"Here's the files and directories up to 2 levels deep in {file_path}, excluding hidden items:\n{result['output']}"
else: else:
output = f"File contents of {path}:\n{result['output']}" output = f"File contents of {file_path}:\n{result['output']}"
else: else:
error = f"Failed to view file {path}: {result['error']}" error = f"Failed to view file {file_path}: {result['error']}"
logger.debug(f"Action: {action.type} for file {path}") logger.debug(f"Action: {action.type} for file {file_path}")
case "text_editor_create": case "text_editor_create":
# Create new file with contents # Create new file with contents
path = action.path file_path = action.path
file_text = action.file_text file_text = action.file_text
escaped_path = path.replace("'", "'\"'\"'") # Type guard: path should be str for text editor actions
if not isinstance(file_path, str):
raise TypeError("Invalid path type for text editor create action")
escaped_path = file_path.replace("'", "'\"'\"'")
escaped_content = file_text.replace("\t", " ").replace( escaped_content = file_text.replace("\t", " ").replace(
"'", "'\"'\"'" "'", "'\"'\"'"
) # Escape single quotes for shell ) # Escape single quotes for shell
@@ -392,19 +399,22 @@ class ComputerEnvironment(Environment):
result = await self._execute_shell_command(cmd) result = await self._execute_shell_command(cmd)
if result["success"]: if result["success"]:
output = f"Created file {path} with {len(file_text)} characters" output = f"Created file {file_path} with {len(file_text)} characters"
else: else:
error = f"Failed to create file {path}: {result['error']}" error = f"Failed to create file {file_path}: {result['error']}"
logger.debug(f"Action: {action.type} created file {path}") logger.debug(f"Action: {action.type} created file {file_path}")
case "text_editor_str_replace": case "text_editor_str_replace":
# Execute string replacement # Execute string replacement
path = action.path file_path = action.path
old_str = action.old_str old_str = action.old_str
new_str = action.new_str new_str = action.new_str
# Type guard: path should be str for text editor actions
if not isinstance(file_path, str):
raise TypeError("Invalid path type for text editor str_replace action")
# Use sed for string replacement, escaping special characters # Use sed for string replacement, escaping special characters
escaped_path = path.replace("'", "'\"'\"'") escaped_path = file_path.replace("'", "'\"'\"'")
escaped_old = ( escaped_old = (
old_str.replace("\t", " ") old_str.replace("\t", " ")
.replace("\\", "\\\\") .replace("\\", "\\\\")
@@ -424,18 +434,23 @@ class ComputerEnvironment(Environment):
result = await self._execute_shell_command(cmd) result = await self._execute_shell_command(cmd)
if result["success"]: if result["success"]:
output = f"Replaced '{old_str[:50]}...' with '{new_str[:50]}...' in {path}" output = f"Replaced '{old_str[:50]}...' with '{new_str[:50]}...' in {file_path}"
else: else:
error = f"Failed to replace text in {path}: {result['error']}" error = f"Failed to replace text in {file_path}: {result['error']}"
logger.debug(f"Action: {action.type} in file {path}") logger.debug(f"Action: {action.type} in file {file_path}")
case "text_editor_insert": case "text_editor_insert":
# Insert text after specified line # Insert text after specified line
path = action.path file_path = action.path
insert_line = action.insert_line insert_line = action.insert_line
new_str = action.new_str new_str = action.new_str
escaped_path = path.replace("'", "'\"'\"'") # Type guard: path should be str for text editor actions
if not isinstance(file_path, str):
error = "Invalid path type for text editor insert action.\n"
error += f"Failed to insert text in {file_path}: {result['error']}"
raise TypeError(error)
escaped_path = file_path.replace("'", "'\"'\"'")
escaped_content = ( escaped_content = (
new_str.replace("\t", " ") new_str.replace("\t", " ")
.replace("\\", "\\\\") .replace("\\", "\\\\")
@@ -446,10 +461,10 @@ class ComputerEnvironment(Environment):
result = await self._execute_shell_command(cmd) result = await self._execute_shell_command(cmd)
if result["success"]: if result["success"]:
output = f"Inserted text after line {insert_line} in {path}" output = f"Inserted text after line {insert_line} in {file_path}"
else: else:
error = f"Failed to insert text in {path}: {result['error']}" error = f"Failed to insert text in {file_path}: {result['error']}"
logger.debug(f"Action: {action.type} at line {insert_line} in file {path}") logger.debug(f"Action: {action.type} at line {insert_line} in file {file_path}")
case _: case _:
error = f"Unrecognized action type: {action.type}" error = f"Unrecognized action type: {action.type}"
@@ -457,6 +472,8 @@ class ComputerEnvironment(Environment):
except KeyboardInterrupt: except KeyboardInterrupt:
error = "User interrupt. Operation aborted." error = "User interrupt. Operation aborted."
logger.error(error) logger.error(error)
except TypeError as e:
logger.error(f"Error executing action {action.type}: {e}")
except Exception as e: except Exception as e:
error = f"Unexpected error executing action {action.type}: {str(e)}" error = f"Unexpected error executing action {action.type}: {str(e)}"
logger.exception( logger.exception(