From d53ede604ce03d55983077ad11cd2699275dcc4d Mon Sep 17 00:00:00 2001 From: Debanjum Date: Sun, 31 Aug 2025 13:17:05 -0700 Subject: [PATCH] Only enable web search with Searxng if KHOJ_SEARXNG_URL env var set Instead of implicitly defaulting to assuming it is available as: - For pip install searxng has to be explicitly setup to work - For docker install we explicitly do set it up and set the KHOJ_SEARXNG_URL env var already Also check if Searxng URL is also unset before disable web search tools now that it is required explicit enablement. --- src/khoj/processor/tools/online_search.py | 14 ++++++++------ src/khoj/utils/helpers.py | 12 +++++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/khoj/processor/tools/online_search.py b/src/khoj/processor/tools/online_search.py index a6cb7e92..d30c4a76 100644 --- a/src/khoj/processor/tools/online_search.py +++ b/src/khoj/processor/tools/online_search.py @@ -47,6 +47,8 @@ JINA_API_KEY = os.getenv("JINA_API_KEY") FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY") FIRECRAWL_USE_LLM_EXTRACT = is_env_var_true("FIRECRAWL_USE_LLM_EXTRACT") +SEARXNG_URL = os.getenv("KHOJ_SEARXNG_URL") + # Timeout for web search and webpage read HTTP requests WEBPAGE_REQUEST_TIMEOUT = 60 # seconds @@ -120,8 +122,9 @@ async def search_online( if JINA_API_KEY: search_engine = "Jina" search_engines.append((search_engine, search_with_jina)) - search_engine = "Searxng" - search_engines.append((search_engine, search_with_searxng)) + if SEARXNG_URL: + search_engine = "Searxng" + search_engines.append((search_engine, search_with_searxng)) if send_status_func: subqueries_str = "\n- " + "\n- ".join(subqueries) @@ -253,9 +256,8 @@ async def search_with_firecrawl(query: str, location: LocationData) -> Tuple[str async def search_with_searxng(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]: """Search using local SearXNG instance.""" - # Use environment variable or default to localhost - searxng_url = os.getenv("KHOJ_SEARXNG_URL", "http://localhost:42113") - search_url = f"{searxng_url}/search" + # Use environment variable + search_url = f"{SEARXNG_URL}/search" country_code = location.country_code.lower() if location and location.country_code else "us" params = {"q": query, "format": "html", "language": "en", "country": country_code, "categories": "general"} @@ -264,7 +266,7 @@ async def search_with_searxng(query: str, location: LocationData) -> Tuple[str, try: async with session.get(search_url, params=params, timeout=WEBPAGE_REQUEST_TIMEOUT) as response: if response.status != 200: - logger.error(f"SearXNG search failed to call {searxng_url}: {await response.text()}") + logger.error(f"SearXNG search failed to call {SEARXNG_URL}: {await response.text()}") return query, {} html_content = await response.text() diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index c746455f..e2591ca8 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -803,11 +803,17 @@ def is_internet_connected(): def is_web_search_enabled(): """ Check if web search tool is enabled. - Set API key via env var for a supported search engine to enable it. + Set API key or provider URL via env var for a supported search engine to enable it. """ return any( - not is_none_or_empty(os.getenv(api_key)) - for api_key in ["GOOGLE_SEARCH_API_KEY", "SERPER_DEV_API_KEY", "JINA_API_KEY", "FIRECRAWL_API_KEY"] + not is_none_or_empty(os.getenv(search_config)) + for search_config in [ + "GOOGLE_SEARCH_API_KEY", + "SERPER_DEV_API_KEY", + "JINA_API_KEY", + "FIRECRAWL_API_KEY", + "KHOJ_SEARXNG_URL", + ] )