mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Support Exa as web search provider
This commit is contained in:
@@ -45,6 +45,8 @@ FIRECRAWL_USE_LLM_EXTRACT = is_env_var_true("FIRECRAWL_USE_LLM_EXTRACT")
|
|||||||
|
|
||||||
SEARXNG_URL = os.getenv("KHOJ_SEARXNG_URL")
|
SEARXNG_URL = os.getenv("KHOJ_SEARXNG_URL")
|
||||||
|
|
||||||
|
EXA_API_KEY = os.getenv("EXA_API_KEY")
|
||||||
|
|
||||||
# Timeout for web search and webpage read HTTP requests
|
# Timeout for web search and webpage read HTTP requests
|
||||||
WEBPAGE_REQUEST_TIMEOUT = 60 # seconds
|
WEBPAGE_REQUEST_TIMEOUT = 60 # seconds
|
||||||
|
|
||||||
@@ -115,6 +117,9 @@ async def search_online(
|
|||||||
if FIRECRAWL_API_KEY:
|
if FIRECRAWL_API_KEY:
|
||||||
search_engine = "Firecrawl"
|
search_engine = "Firecrawl"
|
||||||
search_engines.append((search_engine, search_with_firecrawl))
|
search_engines.append((search_engine, search_with_firecrawl))
|
||||||
|
if EXA_API_KEY:
|
||||||
|
search_engine = "Exa"
|
||||||
|
search_engines.append((search_engine, search_with_exa))
|
||||||
if SEARXNG_URL:
|
if SEARXNG_URL:
|
||||||
search_engine = "Searxng"
|
search_engine = "Searxng"
|
||||||
search_engines.append((search_engine, search_with_searxng))
|
search_engines.append((search_engine, search_with_searxng))
|
||||||
@@ -182,6 +187,73 @@ async def search_online(
|
|||||||
yield response_dict
|
yield response_dict
|
||||||
|
|
||||||
|
|
||||||
|
async def search_with_exa(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
||||||
|
"""
|
||||||
|
Search using Exa API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: The search query string
|
||||||
|
location: Location data for geolocation-based search
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple containing the original query and a dictionary of search results
|
||||||
|
"""
|
||||||
|
# Set up API endpoint and headers
|
||||||
|
exa_search_api_endpoint = "https://api.exa.ai/search"
|
||||||
|
headers = {"Content-Type": "application/json", "x-api-key": EXA_API_KEY}
|
||||||
|
|
||||||
|
# Prepare request payload
|
||||||
|
country_code = location.country_code.upper() if location and location.country_code else "US"
|
||||||
|
payload = {
|
||||||
|
"query": query,
|
||||||
|
"type": "auto",
|
||||||
|
"userLocation": country_code,
|
||||||
|
"numResults": 10,
|
||||||
|
"contents": {
|
||||||
|
"text": False,
|
||||||
|
"highlights": {
|
||||||
|
"numSentences": 5,
|
||||||
|
"highlightsPerUrl": 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
try:
|
||||||
|
async with session.post(
|
||||||
|
exa_search_api_endpoint, headers=headers, json=payload, timeout=WEBPAGE_REQUEST_TIMEOUT
|
||||||
|
) as response:
|
||||||
|
if response.status != 200:
|
||||||
|
error_text = await response.text()
|
||||||
|
logger.error(f"Exa search failed: {error_text}")
|
||||||
|
return query, {}
|
||||||
|
|
||||||
|
response_json = await response.json()
|
||||||
|
results = response_json.get("results", [])
|
||||||
|
|
||||||
|
if is_none_or_empty(results):
|
||||||
|
logger.error(f"Exa search failed: {response_json.get('warning', 'Unknown error')}")
|
||||||
|
return query, {}
|
||||||
|
|
||||||
|
# Transform Exa response to match the expected format
|
||||||
|
organic_results = []
|
||||||
|
for item in results:
|
||||||
|
organic_results.append(
|
||||||
|
{
|
||||||
|
"title": item["title"],
|
||||||
|
"link": item["url"],
|
||||||
|
"snippet": item["highlights"][0] if "highlights" in item else None,
|
||||||
|
"content": item.get("text", None),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return query, {"organic": organic_results}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error searching with Exa: {str(e)}")
|
||||||
|
return query, {}
|
||||||
|
|
||||||
|
|
||||||
async def search_with_firecrawl(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
async def search_with_firecrawl(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
||||||
"""
|
"""
|
||||||
Search using Firecrawl API.
|
Search using Firecrawl API.
|
||||||
|
|||||||
Reference in New Issue
Block a user