diff --git a/src/khoj/processor/operator/grounding_agent_uitars.py b/src/khoj/processor/operator/grounding_agent_uitars.py index 4ba2578b..1646cd5a 100644 --- a/src/khoj/processor/operator/grounding_agent_uitars.py +++ b/src/khoj/processor/operator/grounding_agent_uitars.py @@ -38,7 +38,6 @@ class GroundingAgentUitars: UITARS_USR_PROMPT_THOUGHT = """ You are a GUI agent. You are given a task and a screenshot of the web browser tab you operate. You need to perform the next action to complete the task. You control a single tab in a Chromium browser. You cannot access the OS, filesystem, the application window or the addressbar. - ALWAYS use the `goto()` function to navigate to a specific URL. Ctrl+t, Ctrl+w, Ctrl+q, Ctrl+Shift+T, Ctrl+Shift+W are not allowed. ## Output Format ``` @@ -66,8 +65,6 @@ class GroundingAgentUitars: type(content='') #If you want to submit your input, use "\\n" at the end of `content`. scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left') wait() #Sleep for 5s and take a screenshot to check for any changes. - goto(url='') # ALWAYS use the goto function to navigate to a specific URL. - back() # Use this to go back to the previous page. finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format. """.lstrip() diff --git a/src/khoj/processor/operator/operate_browser.py b/src/khoj/processor/operator/operate_browser.py index cfd677e7..ce69a6fd 100644 --- a/src/khoj/processor/operator/operate_browser.py +++ b/src/khoj/processor/operator/operate_browser.py @@ -79,7 +79,7 @@ async def operate_browser( with timer(f"Operating browser with {reasoning_model.model_type} {reasoning_model.name}", logger): while iterations < max_iterations and not task_completed: if cancellation_event and cancellation_event.is_set(): - logger.info(f"Browser operator cancelled by client disconnect") + logger.debug(f"Browser operator cancelled by client disconnect") break iterations += 1 @@ -94,6 +94,7 @@ async def operate_browser( env_steps: List[EnvStepResult] = [] for action in agent_result.actions: if cancellation_event and cancellation_event.is_set(): + logger.debug(f"Browser operator cancelled by client disconnect") break # Handle request for user action and break the loop if isinstance(action, RequestUserAction):