From 3441783d5bc0934cea3046b18cb68b02dab1e190 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sat, 30 Aug 2025 00:22:26 -0700 Subject: [PATCH] Disable web search tool if no search engine configured by admin Webpage read is gated behind having a web search engine configured for now. It can later be decoupled from web search and depend on whether any web scrapers is configured. --- src/khoj/routers/helpers.py | 3 +++ src/khoj/routers/research.py | 5 +++++ src/khoj/utils/helpers.py | 11 +++++++++++ 3 files changed, 19 insertions(+) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 6c3eac0f..3bfbdb4c 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -121,6 +121,7 @@ from khoj.utils.helpers import ( is_none_or_empty, is_operator_enabled, is_valid_url, + is_web_search_enabled, log_telemetry, mode_descriptions_for_llm, timer, @@ -367,6 +368,8 @@ async def aget_data_sources_and_output_format( continue if source == ConversationCommand.Operator and not is_operator_enabled(): continue + if source in [ConversationCommand.Online, ConversationCommand.Webpage] and not is_web_search_enabled(): + continue source_options[source.value] = description if len(agent_sources) == 0 or source.value in agent_sources: source_options_str += f'- "{source.value}": "{description}"\n' diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index 43abaaf0..d242c5b9 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -36,6 +36,7 @@ from khoj.utils.helpers import ( dict_to_tuple, is_none_or_empty, is_operator_enabled, + is_web_search_enabled, timer, tools_for_research_llm, truncate_code_context, @@ -96,6 +97,7 @@ async def apick_next_tool( ConversationCommand.ViewFile, ConversationCommand.ListFiles, ] + web_research_tools = [ConversationCommand.SearchWeb, ConversationCommand.ReadWebpage] input_tools_to_research_tools = { ConversationCommand.Notes.value: [tool.value for tool in document_research_tools], ConversationCommand.Webpage.value: [ConversationCommand.ReadWebpage.value], @@ -115,6 +117,9 @@ async def apick_next_tool( # Skip showing document related tools if user has no documents if tool in document_research_tools and not user_has_entries: continue + # Skip showing web search tool if agent has no access to internet + if tool in web_research_tools and not is_web_search_enabled(): + continue if tool == ConversationCommand.SemanticSearchFiles: description = tool_data.description.format(max_search_queries=max_document_searches) elif tool == ConversationCommand.ReadWebpage: diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index e8c063d6..a43521d2 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -794,6 +794,17 @@ def is_internet_connected(): return False +def is_web_search_enabled(): + """ + Check if web search tool is enabled. + Set API key via env var for a supported search engine to enable it. + """ + return any( + not is_none_or_empty(os.getenv(api_key)) + for api_key in ["GOOGLE_SEARCH_API_KEY", "SERPER_DEV_API_KEY", "JINA_API_KEY", "FIRECRAWL_API_KEY"] + ) + + def is_internal_url(url: str) -> bool: """ Check if a URL is likely to be internal/non-public.