Eval more model, control randomization & auto read webpage via workflow

- Control auto read webpage via eval workflow. Prefix env var with KHOJ_
  Default to false as it is the default that is going to be used in prod
  going forward.

- Set openai api key via input param in manual eval workflow runs
  - Simplify evaluating other chat models available over openai
    compatible api via eval workflow.
  - Mask input api key as secret in workflow.
  - Discard unnecessary null setting of env vars.

- Control randomization of samples in eval workflow.
  If randomization is turned off, it'll take the first SAMPLE_SIZE
  items from the eval dataset instead of a random collection of
  SAMPLE_SIZE items.
This commit is contained in:
Debanjum
2025-04-03 18:00:25 +05:30
parent 911e1bf981
commit e9928d3c50
2 changed files with 34 additions and 7 deletions

View File

@@ -50,11 +50,32 @@ on:
required: false required: false
default: 5 default: 5
type: number type: number
openai_api_key:
description: 'OpenAI API key'
required: false
default: ''
type: string
openai_base_url: openai_base_url:
description: 'Base URL of OpenAI compatible API' description: 'Base URL of OpenAI compatible API'
required: false required: false
default: '' default: ''
type: string type: string
auto_read_webpage:
description: 'Auto read webpage on online search'
required: false
default: 'false'
type: choice
options:
- 'false'
- 'true'
randomize:
description: 'Randomize the sample of questions'
required: false
default: 'true'
type: choice
options:
- 'false'
- 'true'
jobs: jobs:
eval: eval:
@@ -92,7 +113,14 @@ jobs:
- name: Get App Version - name: Get App Version
id: hatch id: hatch
run: echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT run: |
# Mask relevant workflow inputs as secret early
OPENAI_API_KEY=$(jq -r '.inputs.openai_api_key' $GITHUB_EVENT_PATH)
echo ::add-mask::$OPENAI_API_KEY
echo OPENAI_API_KEY="$OPENAI_API_KEY" >> $GITHUB_ENV
# Get app version from hatch
echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT
- name: ⏬️ Install Dependencies - name: ⏬️ Install Dependencies
env: env:
@@ -115,13 +143,13 @@ jobs:
KHOJ_MODE: ${{ matrix.khoj_mode }} KHOJ_MODE: ${{ matrix.khoj_mode }}
SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }} SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }}
BATCH_SIZE: "20" BATCH_SIZE: "20"
RANDOMIZE: "True" RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
KHOJ_URL: "http://localhost:42110" KHOJ_URL: "http://localhost:42110"
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
KHOJ_LLM_SEED: "42" KHOJ_LLM_SEED: "42"
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 5 }} KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 5 }}
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }} OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }}
SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }} SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }}
OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}} OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}}

View File

@@ -2,7 +2,6 @@ import asyncio
import json import json
import logging import logging
import os import os
import urllib.parse
from collections import defaultdict from collections import defaultdict
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
@@ -33,7 +32,7 @@ logger = logging.getLogger(__name__)
GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY") GOOGLE_SEARCH_API_KEY = os.getenv("GOOGLE_SEARCH_API_KEY")
GOOGLE_SEARCH_ENGINE_ID = os.getenv("GOOGLE_SEARCH_ENGINE_ID") GOOGLE_SEARCH_ENGINE_ID = os.getenv("GOOGLE_SEARCH_ENGINE_ID")
SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY") SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY")
AUTO_READ_WEBPAGE = is_env_var_true("AUTO_READ_WEBPAGE") AUTO_READ_WEBPAGE = is_env_var_true("KHOJ_AUTO_READ_WEBPAGE")
SERPER_DEV_URL = "https://google.serper.dev/search" SERPER_DEV_URL = "https://google.serper.dev/search"
JINA_SEARCH_API_URL = "https://s.jina.ai/" JINA_SEARCH_API_URL = "https://s.jina.ai/"
@@ -113,7 +112,6 @@ async def search_online(
search_engine = "Searxng" search_engine = "Searxng"
search_engines.append((search_engine, search_with_searxng)) search_engines.append((search_engine, search_with_searxng))
logger.info(f"🌐 Searching the Internet for {subqueries}")
if send_status_func: if send_status_func:
subqueries_str = "\n- " + "\n- ".join(subqueries) subqueries_str = "\n- " + "\n- ".join(subqueries)
async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"): async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"):
@@ -121,6 +119,7 @@ async def search_online(
response_dict = {} response_dict = {}
for search_engine, search_func in search_engines: for search_engine, search_func in search_engines:
logger.info(f"🌐 Searching the Internet with {search_engine} for {subqueries}")
with timer(f"Internet searches with {search_engine} for {subqueries} took", logger): with timer(f"Internet searches with {search_engine} for {subqueries} took", logger):
try: try:
search_tasks = [search_func(subquery, location) for subquery in subqueries] search_tasks = [search_func(subquery, location) for subquery in subqueries]