mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Remove deprecated prompt for grounding model to choose goto, back func
Goto and back functions are chosen by the visual reasoning model for increased reliability in selecting those tools. The ui-tars grounding models seems too tuned to use a specific set of tools.
This commit is contained in:
@@ -38,7 +38,6 @@ class GroundingAgentUitars:
|
||||
UITARS_USR_PROMPT_THOUGHT = """
|
||||
You are a GUI agent. You are given a task and a screenshot of the web browser tab you operate. You need to perform the next action to complete the task.
|
||||
You control a single tab in a Chromium browser. You cannot access the OS, filesystem, the application window or the addressbar.
|
||||
ALWAYS use the `goto()` function to navigate to a specific URL. Ctrl+t, Ctrl+w, Ctrl+q, Ctrl+Shift+T, Ctrl+Shift+W are not allowed.
|
||||
|
||||
## Output Format
|
||||
```
|
||||
@@ -66,8 +65,6 @@ class GroundingAgentUitars:
|
||||
type(content='') #If you want to submit your input, use "\\n" at the end of `content`.
|
||||
scroll(start_box='<|box_start|>(x1,y1)<|box_end|>', direction='down or up or right or left')
|
||||
wait() #Sleep for 5s and take a screenshot to check for any changes.
|
||||
goto(url='') # ALWAYS use the goto function to navigate to a specific URL.
|
||||
back() # Use this to go back to the previous page.
|
||||
finished(content='xxx') # Use escape characters \\', \\", and \\n in content part to ensure we can parse the content in normal python string format.
|
||||
""".lstrip()
|
||||
|
||||
|
||||
@@ -79,7 +79,7 @@ async def operate_browser(
|
||||
with timer(f"Operating browser with {reasoning_model.model_type} {reasoning_model.name}", logger):
|
||||
while iterations < max_iterations and not task_completed:
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger.info(f"Browser operator cancelled by client disconnect")
|
||||
logger.debug(f"Browser operator cancelled by client disconnect")
|
||||
break
|
||||
|
||||
iterations += 1
|
||||
@@ -94,6 +94,7 @@ async def operate_browser(
|
||||
env_steps: List[EnvStepResult] = []
|
||||
for action in agent_result.actions:
|
||||
if cancellation_event and cancellation_event.is_set():
|
||||
logger.debug(f"Browser operator cancelled by client disconnect")
|
||||
break
|
||||
# Handle request for user action and break the loop
|
||||
if isinstance(action, RequestUserAction):
|
||||
|
||||
Reference in New Issue
Block a user