From 9f75622346fc361a8e686ebcd457801271dc6175 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sat, 29 Mar 2025 18:03:02 +0530 Subject: [PATCH] Allow browser operator to use browser with existing context over CDP Give the Khoj browser operator access to browser with existing context (auth, cookies etc.) by starting it with CDP enabled. Process: 1. Start Browser with CDP enabled: `Edge/Chromium/Chrome --remote-debugging-port=9222' 2. Set the KHOJ_CDP_URL env var to the CDP url of the browser to use. 3. Start Khoj and ask it to get browser based work done with operator + research mode --- .../processor/operator/browser_operator.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/khoj/processor/operator/browser_operator.py b/src/khoj/processor/operator/browser_operator.py index e4525308..0a8a51c5 100644 --- a/src/khoj/processor/operator/browser_operator.py +++ b/src/khoj/processor/operator/browser_operator.py @@ -2,6 +2,7 @@ import asyncio import base64 import json import logging +import os from datetime import datetime from typing import Callable, List, Literal, Optional @@ -60,7 +61,7 @@ async def operate_browser( playwright, browser, page = await start_browser(width, height) # Operate the browser - max_iterations = 30 + max_iterations = 40 with timer(f"Operating browser with {chat_model.model_type} {chat_model.name}", logger): try: if chat_model.model_type == ChatModel.ModelType.OPENAI: @@ -114,11 +115,18 @@ async def operate_browser( async def start_browser(width: int = 1024, height: int = 768): playwright = await async_playwright().start() - launch_args = [f"--window-size={width},{height}", "--disable-extensions", "--disable-file-system"] - browser = await playwright.chromium.launch(chromium_sandbox=True, headless=False, args=launch_args, env={}) + if cdp_url := os.getenv("KHOJ_CDP_URL"): + browser = await playwright.chromium.connect_over_cdp(cdp_url) + else: + launch_args = [f"--window-size={width},{height}", "--disable-extensions", "--disable-file-system"] + browser = await playwright.chromium.launch(chromium_sandbox=True, headless=False, args=launch_args, env={}) - page = await browser.new_page() - await page.goto("https://duckduckgo.com") + default_context = browser.contexts[0] if browser.contexts else await browser.new_context() + + page = default_context.pages[0] if default_context.pages else await default_context.new_page() + # If page url is blank, navigate to DuckDuckGo + if page.url == "about:blank": + await page.goto("https://duckduckgo.com") await page.set_viewport_size({"width": width, "height": height}) return playwright, browser, page