diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index e6975a0a..20c3ce0d 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -855,7 +855,7 @@ Khoj: infer_webpages_to_read = PromptTemplate.from_template( """ -You are Khoj, an advanced web page reading assistant. You are to construct **up to three, valid** webpage urls to read before answering the user's question. +You are Khoj, an advanced web page reading assistant. You are to construct **up to {max_webpages}, valid** webpage urls to read before answering the user's question. - You will receive the conversation history as context. - Add as much context from the previous questions and answers as required to construct the webpage urls. - Use multiple web page urls if required to retrieve the relevant information. diff --git a/src/khoj/processor/tools/online_search.py b/src/khoj/processor/tools/online_search.py index 33cc3056..4234b32c 100644 --- a/src/khoj/processor/tools/online_search.py +++ b/src/khoj/processor/tools/online_search.py @@ -55,9 +55,6 @@ OLOSTEP_QUERY_PARAMS = { "expandHtml": "False", } -DEFAULT_MAX_WEBPAGES_TO_READ = 1 -MAX_WEBPAGES_TO_INFER = 10 - async def search_online( query: str, @@ -66,7 +63,7 @@ async def search_online( user: KhojUser, send_status_func: Optional[Callable] = None, custom_filters: List[str] = [], - max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ, + max_webpages_to_read: int = 1, query_images: List[str] = None, previous_subqueries: Set = set(), agent: Agent = None, @@ -282,7 +279,7 @@ async def read_webpages( send_status_func: Optional[Callable] = None, query_images: List[str] = None, agent: Agent = None, - max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ, + max_webpages_to_read: int = 1, query_files: str = None, tracer: dict = {}, ): @@ -290,6 +287,7 @@ async def read_webpages( logger.info(f"Inferring web pages to read") urls = await infer_webpage_urls( query, + max_webpages_to_read, conversation_history, location, user, @@ -299,9 +297,6 @@ async def read_webpages( tracer=tracer, ) - # Get the top 10 web pages to read - urls = urls[:max_webpages_to_read] - logger.info(f"Reading web pages at: {urls}") if send_status_func: webpage_links_str = "\n- " + "\n- ".join(list(urls)) diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index e1961b96..2f709ffb 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -1106,6 +1106,7 @@ async def chat( location, user, partial(send_event, ChatEvent.STATUS), + max_webpages_to_read=1, query_images=uploaded_images, agent=agent, query_files=attached_file_context, diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index c7e71b09..acca60b0 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -450,6 +450,7 @@ async def aget_data_sources_and_output_format( async def infer_webpage_urls( q: str, + max_webpages: int, conversation_history: dict, location_data: LocationData, user: KhojUser, @@ -471,9 +472,10 @@ async def infer_webpage_urls( ) online_queries_prompt = prompts.infer_webpages_to_read.format( - current_date=utc_date, query=q, + max_webpages=max_webpages, chat_history=chat_history, + current_date=utc_date, location=location, username=username, personality_context=personality_context, @@ -502,7 +504,7 @@ async def infer_webpage_urls( if len(valid_unique_urls) == 0: logger.error(f"No valid URLs found in response: {response}") return [] - return list(valid_unique_urls) + return list(valid_unique_urls)[:max_webpages] except Exception: raise ValueError(f"Invalid list of urls: {response}") diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index d534e812..b662dca9 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -321,6 +321,7 @@ async def execute_information_collection( location, user, send_status_func, + max_webpages_to_read=1, query_images=query_images, agent=agent, tracer=tracer,