From 1e390325d2d668a44ac6caa273d82fc287475778 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 10 Oct 2024 02:19:08 -0700 Subject: [PATCH] Let research chat director decide which webpage to read, if any Make webpages to read automatically on search_online configurable via a argument. Set it to default to 1, so other callers of the function are unaffected. But iterative chat director can still decide which, if any, webpages to read based on the online search it performs --- src/khoj/processor/tools/online_search.py | 6 +++--- src/khoj/routers/research.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/khoj/processor/tools/online_search.py b/src/khoj/processor/tools/online_search.py index 840d8a81..c2e051d6 100644 --- a/src/khoj/processor/tools/online_search.py +++ b/src/khoj/processor/tools/online_search.py @@ -46,8 +46,7 @@ OLOSTEP_QUERY_PARAMS = { "expandHtml": "False", } -# TODO: Should this be 0 to let advanced model decide which web pages to read? -MAX_WEBPAGES_TO_READ = 1 +DEFAULT_MAX_WEBPAGES_TO_READ = 1 async def search_online( @@ -58,6 +57,7 @@ async def search_online( subscribed: bool = False, send_status_func: Optional[Callable] = None, custom_filters: List[str] = [], + max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ, uploaded_image_url: str = None, agent: Agent = None, ): @@ -91,7 +91,7 @@ async def search_online( webpages = { (organic.get("link"), subquery, organic.get("content")) for subquery in response_dict - for organic in response_dict[subquery].get("organic", [])[:MAX_WEBPAGES_TO_READ] + for organic in response_dict[subquery].get("organic", [])[:max_webpages_to_read] if "answerBox" not in response_dict[subquery] } diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index 1ada9e7a..de47419a 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -199,6 +199,7 @@ async def execute_information_collection( subscribed, send_status_func, [], + max_webpages_to_read=0, uploaded_image_url=uploaded_image_url, agent=agent, ):