Eval more model, control randomization & auto read webpage via workflow

- Control auto read webpage via eval workflow. Prefix env var with KHOJ_ Default to false as it is the default that is going to be used in prod going forward. - Set openai api key via input param in manual eval workflow runs - Simplify evaluating other chat models available over openai compatible api via eval workflow. - Mask input api key as secret in workflow. - Discard unnecessary null setting of env vars. - Control randomization of samples in eval workflow. If randomization is turned off, it'll take the first SAMPLE_SIZE items from the eval dataset instead of a random collection of SAMPLE_SIZE items.
2026-03-02 13:18:18 +00:00 · 2025-04-03 18:00:25 +05:30
parent 911e1bf981
commit e9928d3c50
2 changed files with 34 additions and 7 deletions
--- a/.github/workflows/run_evals.yml
+++ b/.github/workflows/run_evals.yml
@@ -50,11 +50,32 @@ on:
        required: false
        default: 5
        type: number
+      openai_api_key:
+        description: 'OpenAI API key'
+        required: false
+        default: ''
+        type: string
      openai_base_url:
        description: 'Base URL of OpenAI compatible API'
        required: false
        default: ''
        type: string
+      auto_read_webpage:
+        description: 'Auto read webpage on online search'
+        required: false
+        default: 'false'
+        type: choice
+        options:
+          - 'false'
+          - 'true'
+      randomize:
+        description: 'Randomize the sample of questions'
+        required: false
+        default: 'true'
+        type: choice
+        options:
+          - 'false'
+          - 'true'

 jobs:
  eval:
@@ -92,7 +113,14 @@ jobs:

      - name: Get App Version
        id: hatch
-        run: echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT
+        run: |
+          # Mask relevant workflow inputs as secret early
+          OPENAI_API_KEY=$(jq -r '.inputs.openai_api_key' $GITHUB_EVENT_PATH)
+          echo ::add-mask::$OPENAI_API_KEY
+          echo OPENAI_API_KEY="$OPENAI_API_KEY" >> $GITHUB_ENV
+
+          # Get app version from hatch
+          echo "version=$(pipx run hatch version)" >> $GITHUB_OUTPUT

      - name: ⏬️ Install Dependencies
        env:
@@ -115,13 +143,13 @@ jobs:
          KHOJ_MODE: ${{ matrix.khoj_mode }}
          SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }}
          BATCH_SIZE: "20"
-          RANDOMIZE: "True"
+          RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
          KHOJ_URL: "http://localhost:42110"
-          KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
          KHOJ_LLM_SEED: "42"
+          KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
          KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 5 }}
+          KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
          GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
-          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          OPENAI_BASE_URL: ${{ github.event_name == 'workflow_dispatch' && inputs.openai_base_url || '' }}
          SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY || '' }}
          OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY || ''}}