Remove deprecated prompt for grounding model to choose goto, back func

Goto and back functions are chosen by the visual reasoning model for
increased reliability in selecting those tools. The ui-tars grounding
models seems too tuned to use a specific set of tools.
This commit is contained in:
Debanjum
2025-05-10 16:27:53 -06:00
parent 1442a4f6fb
commit 59e0e092b0
2 changed files with 2 additions and 4 deletions

View File

@@ -38,7 +38,6 @@ class GroundingAgentUitars:
UITARS_USR_PROMPT_THOUGHT = """
You are a GUI agent. You are given a task and a screenshot of the web browser tab you operate. You need to perform the next action to complete the task.
You control a single tab in a Chromium browser. You cannot access the OS, filesystem, the application window or the addressbar.
ALWAYS use the `goto()` function to navigate to a specific URL. Ctrl+t, Ctrl+w, Ctrl+q, Ctrl+Shift+T, Ctrl+Shift+W are not allowed.
## Output Format
```
@@ -66,8 +65,6 @@ class GroundingAgentUitars:
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
wait() #Sleep for 5s and take a screenshot to check for any changes.
goto(url='') # ALWAYS use the goto function to navigate to a specific URL.
back() # Use this to go back to the previous page.
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
""".lstrip()

View File

@@ -79,7 +79,7 @@ async def operate_browser(
with timer(f"Operating browser with {reasoning_model.model_type} {reasoning_model.name}", logger):
while iterations < max_iterations and not task_completed:
if cancellation_event and cancellation_event.is_set():
logger.info(f"Browser operator cancelled by client disconnect")
logger.debug(f"Browser operator cancelled by client disconnect")
break
iterations += 1
@@ -94,6 +94,7 @@ async def operate_browser(
env_steps: List[EnvStepResult] = []
for action in agent_result.actions:
if cancellation_event and cancellation_event.is_set():
logger.debug(f"Browser operator cancelled by client disconnect")
break
# Handle request for user action and break the loop
if isinstance(action, RequestUserAction):