mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-06 05:39:12 +00:00
Make back action in browser environment use goto to avoid timeouts
For some reason the page.go_back() action in playwright had a much higher propensity to timeout. Use goto instead to reduce these page traversal timeouts. This requires tracking navigation history.
This commit is contained in:
@@ -27,6 +27,7 @@ class BrowserEnvironment(Environment):
|
|||||||
self.height: int = 768
|
self.height: int = 768
|
||||||
self.visited_urls: Set[str] = set()
|
self.visited_urls: Set[str] = set()
|
||||||
self.excluded_urls = {"about:blank", "https://duckduckgo.com", "https://www.bing.com", "https://www.google.com"}
|
self.excluded_urls = {"about:blank", "https://duckduckgo.com", "https://www.bing.com", "https://www.google.com"}
|
||||||
|
self.navigation_history: list[str] = []
|
||||||
|
|
||||||
async def start(self, width: int = 1024, height: int = 768) -> None:
|
async def start(self, width: int = 1024, height: int = 768) -> None:
|
||||||
self.width = width
|
self.width = width
|
||||||
@@ -48,7 +49,13 @@ class BrowserEnvironment(Environment):
|
|||||||
# Define a handler for page load events to capture URLs
|
# Define a handler for page load events to capture URLs
|
||||||
async def handle_load(loaded_page: Page):
|
async def handle_load(loaded_page: Page):
|
||||||
url = loaded_page.url
|
url = loaded_page.url
|
||||||
if url and url not in self.excluded_urls and url not in self.visited_urls:
|
if not url:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not self.navigation_history or self.navigation_history[-1] != url:
|
||||||
|
self.navigation_history.append(url)
|
||||||
|
|
||||||
|
if url not in self.excluded_urls and url not in self.visited_urls:
|
||||||
logger.debug(f"Page loaded: {url}")
|
logger.debug(f"Page loaded: {url}")
|
||||||
self.visited_urls.add(url)
|
self.visited_urls.add(url)
|
||||||
|
|
||||||
@@ -252,9 +259,15 @@ class BrowserEnvironment(Environment):
|
|||||||
logger.debug(f"Action: {action.type} to {url}")
|
logger.debug(f"Action: {action.type} to {url}")
|
||||||
|
|
||||||
case "back":
|
case "back":
|
||||||
await self.page.go_back()
|
if len(self.navigation_history) > 1:
|
||||||
output = "Navigated back"
|
self.navigation_history.pop()
|
||||||
logger.debug(f"Action: {action.type}")
|
previous_url = self.navigation_history[-1]
|
||||||
|
await self.page.goto(previous_url)
|
||||||
|
output = f"Navigated back to {previous_url}"
|
||||||
|
else:
|
||||||
|
output = "No previous URL to navigate back"
|
||||||
|
previous_url = "about:blank"
|
||||||
|
logger.debug(f"Action: {action.type} to {previous_url}")
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
error = f"Unrecognized action type: {action.type}"
|
error = f"Unrecognized action type: {action.type}"
|
||||||
|
|||||||
Reference in New Issue
Block a user