mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-04 21:29:12 +00:00
Eval more model, control randomization & auto read webpage via workflow
- Control auto read webpage via eval workflow. Prefix env var with KHOJ_
Default to false as it is the default that is going to be used in prod
going forward.
- Set openai api key via input param in manual eval workflow runs
- Simplify evaluating other chat models available over openai
compatible api via eval workflow.
- Mask input api key as secret in workflow.
- Discard unnecessary null setting of env vars.
- Control randomization of samples in eval workflow.
If randomization is turned off, it'll take the first SAMPLE_SIZE
items from the eval dataset instead of a random collection of
SAMPLE_SIZE items.
This commit is contained in:
@@ -2,7 +2,6 @@ import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
from collections import defaultdict
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
@@ -33,7 +32,7 @@ logger = logging.getLogger(__name__)
|
||||
GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
|
||||
GOOGLE_SEARCH_ENGINE_ID = os.getenv("GOOGLE_SEARCH_ENGINE_ID")
|
||||
SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY")
|
||||
AUTO_READ_WEBPAGE = is_env_var_true("AUTO_READ_WEBPAGE")
|
||||
AUTO_READ_WEBPAGE = is_env_var_true("KHOJ_AUTO_READ_WEBPAGE")
|
||||
SERPER_DEV_URL = "https://google.serper.dev/search"
|
||||
|
||||
JINA_SEARCH_API_URL = "https://s.jina.ai/"
|
||||
@@ -113,7 +112,6 @@ async def search_online(
|
||||
search_engine = "Searxng"
|
||||
search_engines.append((search_engine, search_with_searxng))
|
||||
|
||||
logger.info(f"🌐 Searching the Internet for {subqueries}")
|
||||
if send_status_func:
|
||||
subqueries_str = "\n- " + "\n- ".join(subqueries)
|
||||
async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"):
|
||||
@@ -121,6 +119,7 @@ async def search_online(
|
||||
|
||||
response_dict = {}
|
||||
for search_engine, search_func in search_engines:
|
||||
logger.info(f"🌐 Searching the Internet with {search_engine} for {subqueries}")
|
||||
with timer(f"Internet searches with {search_engine} for {subqueries} took", logger):
|
||||
try:
|
||||
search_tasks = [search_func(subquery, location) for subquery in subqueries]
|
||||
|
||||
Reference in New Issue
Block a user