diff --git a/.github/workflows/run_evals.yml b/.github/workflows/run_evals.yml index 801b4e44..c7fbd6c7 100644 --- a/.github/workflows/run_evals.yml +++ b/.github/workflows/run_evals.yml @@ -50,11 +50,32 @@ on: required: false default: 5 type: number + openai_api_key: + description: 'OpenAI API key' + required: false + default: '' + type: string openai_base_url: description: 'Base URL of OpenAI compatible API' required: false default: '' type: string + auto_read_webpage: + description: 'Auto read webpage on online search' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + randomize: + description: 'Randomize the sample of questions' + required: false + default: 'true' + type: choice + options: + - 'false' + - 'true' jobs: eval: @@ -92,7 +113,14 @@ jobs: - name: Get App Version id: hatch - run: echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT + run: | + # Mask relevant workflow inputs as secret early + OPENAI_API_KEY=$(jq -r '.inputs.openai_api_key' $GITHUB_EVENT_PATH) + echo ::add-mask::$OPENAI_API_KEY + echo OPENAI_API_KEY="$OPENAI_API_KEY" >> $GITHUB_ENV + + # Get app version from hatch + echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT - name: ⏬️ Install Dependencies env: @@ -115,13 +143,13 @@ jobs: KHOJ_MODE: ${{ matrix.khoj_mode }} SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }} BATCH_SIZE: "20" - RANDOMIZE: "True" + RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }} KHOJ_URL: "http://localhost:42110" - KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }} KHOJ_LLM_SEED: "42" + KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }} KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 5 }} + KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }} SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }} OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}} diff --git a/src/khoj/processor/tools/online_search.py b/src/khoj/processor/tools/online_search.py index e1476e40..0564b65c 100644 --- a/src/khoj/processor/tools/online_search.py +++ b/src/khoj/processor/tools/online_search.py @@ -2,7 +2,6 @@ import asyncio import json import logging import os -import urllib.parse from collections import defaultdict from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union @@ -33,7 +32,7 @@ logger = logging.getLogger(__name__) GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY") GOOGLE_SEARCH_ENGINE_ID = os.getenv("GOOGLE_SEARCH_ENGINE_ID") SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY") -AUTO_READ_WEBPAGE = is_env_var_true("AUTO_READ_WEBPAGE") +AUTO_READ_WEBPAGE = is_env_var_true("KHOJ_AUTO_READ_WEBPAGE") SERPER_DEV_URL = "https://google.serper.dev/search" JINA_SEARCH_API_URL = "https://s.jina.ai/" @@ -113,7 +112,6 @@ async def search_online( search_engine = "Searxng" search_engines.append((search_engine, search_with_searxng)) - logger.info(f"🌐 Searching the Internet for {subqueries}") if send_status_func: subqueries_str = "\n- " + "\n- ".join(subqueries) async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"): @@ -121,6 +119,7 @@ async def search_online( response_dict = {} for search_engine, search_func in search_engines: + logger.info(f"🌐 Searching the Internet with {search_engine} for {subqueries}") with timer(f"Internet searches with {search_engine} for {subqueries} took", logger): try: search_tasks = [search_func(subquery, location) for subquery in subqueries]