diff --git a/src/khoj/processor/operator/operator_actions.py b/src/khoj/processor/operator/operator_actions.py index 725d8d19..435083cd 100644 --- a/src/khoj/processor/operator/operator_actions.py +++ b/src/khoj/processor/operator/operator_actions.py @@ -5,8 +5,8 @@ from pydantic import BaseModel class Point(BaseModel): - x: int - y: int + x: float + y: float class BaseAction(BaseModel): @@ -15,32 +15,32 @@ class BaseAction(BaseModel): class ClickAction(BaseAction): type: Literal["click"] = "click" - x: int - y: int + x: float + y: float button: Literal["left", "right", "middle", "wheel"] = "left" modifiers: str = None class DoubleClickAction(BaseAction): type: Literal["double_click"] = "double_click" - x: int - y: int + x: float + y: float class TripleClickAction(BaseAction): type: Literal["triple_click"] = "triple_click" - x: int - y: int + x: float + y: float class ScrollAction(BaseAction): type: Literal["scroll"] = "scroll" - x: Optional[int] = None - y: Optional[int] = None - scroll_x: Optional[int] = None - scroll_y: Optional[int] = None + x: Optional[float] = None + y: Optional[float] = None + scroll_x: Optional[float] = None + scroll_y: Optional[float] = None scroll_direction: Optional[Literal["up", "down", "left", "right"]] = None - scroll_amount: Optional[int] = 2 + scroll_amount: Optional[float] = 2.0 class KeypressAction(BaseAction): @@ -64,8 +64,8 @@ class ScreenshotAction(BaseAction): class MoveAction(BaseAction): type: Literal["move"] = "move" - x: int - y: int + x: float + y: float class DragAction(BaseAction): @@ -89,6 +89,16 @@ class HoldKeyAction(BaseAction): duration: float = 1.0 +class KeyUpAction(BaseAction): + type: Literal["key_up"] = "key_up" + key: str + + +class KeyDownAction(BaseAction): + type: Literal["key_down"] = "key_down" + key: str + + class CursorPositionAction(BaseAction): type: Literal["cursor_position"] = "cursor_position" @@ -123,7 +133,10 @@ OperatorAction = Union[ MouseDownAction, MouseUpAction, HoldKeyAction, + KeyDownAction, + KeyUpAction, CursorPositionAction, GotoAction, BackAction, + RequestUserAction, ] diff --git a/src/khoj/processor/operator/operator_environment_browser.py b/src/khoj/processor/operator/operator_environment_browser.py index 6b817273..7edb1a5d 100644 --- a/src/khoj/processor/operator/operator_environment_browser.py +++ b/src/khoj/processor/operator/operator_environment_browser.py @@ -243,6 +243,18 @@ class BrowserEnvironment(Environment): output = f"Held key{'s' if len(keys) > 1 else ''} {keys_to_parse} for {duration} seconds" logger.debug(f"Action: {action.type} '{keys_to_parse}' for {duration}s") + case "key_down": + key = action.key + await self.page.keyboard.down(key) + output = f"Key down: {key}" + logger.debug(f"Action: {action.type} {key}") + + case "key_up": + key = action.key + await self.page.keyboard.up(key) + output = f"Key up: {key}" + logger.debug(f"Action: {action.type} {key}") + case "cursor_position": # Playwright doesn't directly expose mouse position easily without JS injection # Returning a placeholder for now