Add pages visited via browser operator to references returned to clients

This commit is contained in:
Debanjum
2025-05-04 01:27:18 -06:00
parent e71575ad1a
commit a1c9c6b2e3
3 changed files with 38 additions and 6 deletions

View File

@@ -6,7 +6,7 @@ import os
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from copy import deepcopy from copy import deepcopy
from datetime import datetime from datetime import datetime
from typing import Callable, List, Literal, Optional, Union from typing import Callable, List, Literal, Optional, Set, Union
import requests import requests
from anthropic.types.beta import BetaContentBlock, BetaMessage from anthropic.types.beta import BetaContentBlock, BetaMessage
@@ -224,6 +224,8 @@ class BrowserEnvironment(Environment):
self.page: Optional[Page] = None self.page: Optional[Page] = None
self.width: int = 1024 self.width: int = 1024
self.height: int = 768 self.height: int = 768
self.visited_urls: Set[str] = set()
self.excluded_urls = {"about:blank", "https://duckduckgo.com", "https://www.bing.com", "https://www.google.com"}
async def start(self, width: int = 1024, height: int = 768) -> None: async def start(self, width: int = 1024, height: int = 768) -> None:
self.width = width self.width = width
@@ -242,6 +244,16 @@ class BrowserEnvironment(Environment):
default_context = self.browser.contexts[0] if self.browser.contexts else await self.browser.new_context() default_context = self.browser.contexts[0] if self.browser.contexts else await self.browser.new_context()
self.page = default_context.pages[0] if default_context.pages else await default_context.new_page() self.page = default_context.pages[0] if default_context.pages else await default_context.new_page()
# Define a handler for page load events to capture URLs
async def handle_load(loaded_page: Page):
url = loaded_page.url
if url and url not in self.excluded_urls and url not in self.visited_urls:
logger.debug(f"Page loaded: {url}")
self.visited_urls.add(url)
# Listen for load events on the main page
self.page.on("load", handle_load)
# Define a handler for new pages # Define a handler for new pages
async def handle_new_page(new_page: Page): async def handle_new_page(new_page: Page):
# Get the target URL of the new page # Get the target URL of the new page
@@ -1211,4 +1223,7 @@ async def operate_browser(
if environment and not safety_check_message: # Don't close browser if safety check pending if environment and not safety_check_message: # Don't close browser if safety check pending
await environment.close() await environment.close()
yield safety_check_message or response yield {
"text": safety_check_message or response,
"webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
}

View File

@@ -1233,8 +1233,16 @@ async def chat(
): ):
if isinstance(result, dict) and ChatEvent.STATUS in result: if isinstance(result, dict) and ChatEvent.STATUS in result:
yield result[ChatEvent.STATUS] yield result[ChatEvent.STATUS]
elif isinstance(result, str): else:
operator_results.append(result) operator_results.append(result["text"])
# Add webpages visited while operating browser to references
if result.get("webpages"):
if not online_results.get(defiltered_query):
online_results[defiltered_query] = {"webpages": result["webpages"]}
elif not online_results[defiltered_query].get("webpages"):
online_results[defiltered_query]["webpages"] = result["webpages"]
else:
online_results[defiltered_query]["webpages"] += result["webpages"]
except ValueError as e: except ValueError as e:
program_execution_context.append(f"Browser operation error: {e}") program_execution_context.append(f"Browser operation error: {e}")
logger.warning(f"Failed to operate browser with {e}", exc_info=True) logger.warning(f"Failed to operate browser with {e}", exc_info=True)

View File

@@ -416,9 +416,18 @@ async def execute_information_collection(
): ):
if isinstance(result, dict) and ChatEvent.STATUS in result: if isinstance(result, dict) and ChatEvent.STATUS in result:
yield result[ChatEvent.STATUS] yield result[ChatEvent.STATUS]
elif isinstance(result, str): else:
operator_results = result # type: ignore operator_results = result["text"] # type: ignore
this_iteration.operatorContext = operator_results this_iteration.operatorContext = operator_results
# Add webpages visited while operating browser to references
if result.get("webpages"):
if not online_results.get(this_iteration.query):
online_results[this_iteration.query] = {"webpages": result["webpages"]}
elif not online_results[this_iteration.query].get("webpages"):
online_results[this_iteration.query]["webpages"] = result["webpages"]
else:
online_results[this_iteration.query]["webpages"] += result["webpages"]
this_iteration.onlineContext = online_results
except Exception as e: except Exception as e:
this_iteration.warning = f"Error operating browser: {e}" this_iteration.warning = f"Error operating browser: {e}"
logger.error(this_iteration.warning, exc_info=True) logger.error(this_iteration.warning, exc_info=True)