mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-05 05:39:11 +00:00
Add pages visited via browser operator to references returned to clients
This commit is contained in:
@@ -6,7 +6,7 @@ import os
|
||||
from abc import ABC, abstractmethod
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from typing import Callable, List, Literal, Optional, Union
|
||||
from typing import Callable, List, Literal, Optional, Set, Union
|
||||
|
||||
import requests
|
||||
from anthropic.types.beta import BetaContentBlock, BetaMessage
|
||||
@@ -224,6 +224,8 @@ class BrowserEnvironment(Environment):
|
||||
self.page: Optional[Page] = None
|
||||
self.width: int = 1024
|
||||
self.height: int = 768
|
||||
self.visited_urls: Set[str] = set()
|
||||
self.excluded_urls = {"about:blank", "https://duckduckgo.com", "https://www.bing.com", "https://www.google.com"}
|
||||
|
||||
async def start(self, width: int = 1024, height: int = 768) -> None:
|
||||
self.width = width
|
||||
@@ -242,6 +244,16 @@ class BrowserEnvironment(Environment):
|
||||
default_context = self.browser.contexts[0] if self.browser.contexts else await self.browser.new_context()
|
||||
self.page = default_context.pages[0] if default_context.pages else await default_context.new_page()
|
||||
|
||||
# Define a handler for page load events to capture URLs
|
||||
async def handle_load(loaded_page: Page):
|
||||
url = loaded_page.url
|
||||
if url and url not in self.excluded_urls and url not in self.visited_urls:
|
||||
logger.debug(f"Page loaded: {url}")
|
||||
self.visited_urls.add(url)
|
||||
|
||||
# Listen for load events on the main page
|
||||
self.page.on("load", handle_load)
|
||||
|
||||
# Define a handler for new pages
|
||||
async def handle_new_page(new_page: Page):
|
||||
# Get the target URL of the new page
|
||||
@@ -1211,4 +1223,7 @@ async def operate_browser(
|
||||
if environment and not safety_check_message: # Don't close browser if safety check pending
|
||||
await environment.close()
|
||||
|
||||
yield safety_check_message or response
|
||||
yield {
|
||||
"text": safety_check_message or response,
|
||||
"webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
|
||||
}
|
||||
|
||||
@@ -1233,8 +1233,16 @@ async def chat(
|
||||
):
|
||||
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
||||
yield result[ChatEvent.STATUS]
|
||||
elif isinstance(result, str):
|
||||
operator_results.append(result)
|
||||
else:
|
||||
operator_results.append(result["text"])
|
||||
# Add webpages visited while operating browser to references
|
||||
if result.get("webpages"):
|
||||
if not online_results.get(defiltered_query):
|
||||
online_results[defiltered_query] = {"webpages": result["webpages"]}
|
||||
elif not online_results[defiltered_query].get("webpages"):
|
||||
online_results[defiltered_query]["webpages"] = result["webpages"]
|
||||
else:
|
||||
online_results[defiltered_query]["webpages"] += result["webpages"]
|
||||
except ValueError as e:
|
||||
program_execution_context.append(f"Browser operation error: {e}")
|
||||
logger.warning(f"Failed to operate browser with {e}", exc_info=True)
|
||||
|
||||
@@ -416,9 +416,18 @@ async def execute_information_collection(
|
||||
):
|
||||
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
||||
yield result[ChatEvent.STATUS]
|
||||
elif isinstance(result, str):
|
||||
operator_results = result # type: ignore
|
||||
else:
|
||||
operator_results = result["text"] # type: ignore
|
||||
this_iteration.operatorContext = operator_results
|
||||
# Add webpages visited while operating browser to references
|
||||
if result.get("webpages"):
|
||||
if not online_results.get(this_iteration.query):
|
||||
online_results[this_iteration.query] = {"webpages": result["webpages"]}
|
||||
elif not online_results[this_iteration.query].get("webpages"):
|
||||
online_results[this_iteration.query]["webpages"] = result["webpages"]
|
||||
else:
|
||||
online_results[this_iteration.query]["webpages"] += result["webpages"]
|
||||
this_iteration.onlineContext = online_results
|
||||
except Exception as e:
|
||||
this_iteration.warning = f"Error operating browser: {e}"
|
||||
logger.error(this_iteration.warning, exc_info=True)
|
||||
|
||||
Reference in New Issue
Block a user