mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-10 13:26:13 +00:00
Disable web search tool if no search engine configured by admin
Webpage read is gated behind having a web search engine configured for now. It can later be decoupled from web search and depend on whether any web scrapers is configured.
This commit is contained in:
@@ -121,6 +121,7 @@ from khoj.utils.helpers import (
|
|||||||
is_none_or_empty,
|
is_none_or_empty,
|
||||||
is_operator_enabled,
|
is_operator_enabled,
|
||||||
is_valid_url,
|
is_valid_url,
|
||||||
|
is_web_search_enabled,
|
||||||
log_telemetry,
|
log_telemetry,
|
||||||
mode_descriptions_for_llm,
|
mode_descriptions_for_llm,
|
||||||
timer,
|
timer,
|
||||||
@@ -367,6 +368,8 @@ async def aget_data_sources_and_output_format(
|
|||||||
continue
|
continue
|
||||||
if source == ConversationCommand.Operator and not is_operator_enabled():
|
if source == ConversationCommand.Operator and not is_operator_enabled():
|
||||||
continue
|
continue
|
||||||
|
if source in [ConversationCommand.Online, ConversationCommand.Webpage] and not is_web_search_enabled():
|
||||||
|
continue
|
||||||
source_options[source.value] = description
|
source_options[source.value] = description
|
||||||
if len(agent_sources) == 0 or source.value in agent_sources:
|
if len(agent_sources) == 0 or source.value in agent_sources:
|
||||||
source_options_str += f'- "{source.value}": "{description}"\n'
|
source_options_str += f'- "{source.value}": "{description}"\n'
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ from khoj.utils.helpers import (
|
|||||||
dict_to_tuple,
|
dict_to_tuple,
|
||||||
is_none_or_empty,
|
is_none_or_empty,
|
||||||
is_operator_enabled,
|
is_operator_enabled,
|
||||||
|
is_web_search_enabled,
|
||||||
timer,
|
timer,
|
||||||
tools_for_research_llm,
|
tools_for_research_llm,
|
||||||
truncate_code_context,
|
truncate_code_context,
|
||||||
@@ -96,6 +97,7 @@ async def apick_next_tool(
|
|||||||
ConversationCommand.ViewFile,
|
ConversationCommand.ViewFile,
|
||||||
ConversationCommand.ListFiles,
|
ConversationCommand.ListFiles,
|
||||||
]
|
]
|
||||||
|
web_research_tools = [ConversationCommand.SearchWeb, ConversationCommand.ReadWebpage]
|
||||||
input_tools_to_research_tools = {
|
input_tools_to_research_tools = {
|
||||||
ConversationCommand.Notes.value: [tool.value for tool in document_research_tools],
|
ConversationCommand.Notes.value: [tool.value for tool in document_research_tools],
|
||||||
ConversationCommand.Webpage.value: [ConversationCommand.ReadWebpage.value],
|
ConversationCommand.Webpage.value: [ConversationCommand.ReadWebpage.value],
|
||||||
@@ -115,6 +117,9 @@ async def apick_next_tool(
|
|||||||
# Skip showing document related tools if user has no documents
|
# Skip showing document related tools if user has no documents
|
||||||
if tool in document_research_tools and not user_has_entries:
|
if tool in document_research_tools and not user_has_entries:
|
||||||
continue
|
continue
|
||||||
|
# Skip showing web search tool if agent has no access to internet
|
||||||
|
if tool in web_research_tools and not is_web_search_enabled():
|
||||||
|
continue
|
||||||
if tool == ConversationCommand.SemanticSearchFiles:
|
if tool == ConversationCommand.SemanticSearchFiles:
|
||||||
description = tool_data.description.format(max_search_queries=max_document_searches)
|
description = tool_data.description.format(max_search_queries=max_document_searches)
|
||||||
elif tool == ConversationCommand.ReadWebpage:
|
elif tool == ConversationCommand.ReadWebpage:
|
||||||
|
|||||||
@@ -794,6 +794,17 @@ def is_internet_connected():
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def is_web_search_enabled():
|
||||||
|
"""
|
||||||
|
Check if web search tool is enabled.
|
||||||
|
Set API key via env var for a supported search engine to enable it.
|
||||||
|
"""
|
||||||
|
return any(
|
||||||
|
not is_none_or_empty(os.getenv(api_key))
|
||||||
|
for api_key in ["GOOGLE_SEARCH_API_KEY", "SERPER_DEV_API_KEY", "JINA_API_KEY", "FIRECRAWL_API_KEY"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def is_internal_url(url: str) -> bool:
|
def is_internal_url(url: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if a URL is likely to be internal/non-public.
|
Check if a URL is likely to be internal/non-public.
|
||||||
|
|||||||
Reference in New Issue
Block a user