mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Deterministically shuffle dataset for consistent data in a eval run
Previously eval run across modes would use different dataset shuffles. This change enables a strict apples to apples perf comparison of the different khoj modes across the same (random) subset of questions by using a dataset seed per workflow run to sample questions
This commit is contained in:
8
.github/workflows/run_evals.yml
vendored
8
.github/workflows/run_evals.yml
vendored
@@ -76,6 +76,11 @@ on:
|
||||
options:
|
||||
- 'false'
|
||||
- 'true'
|
||||
dataset_seed:
|
||||
description: 'Seed to deterministically shuffle questions'
|
||||
required: false
|
||||
default: ''
|
||||
type: string
|
||||
|
||||
jobs:
|
||||
eval:
|
||||
@@ -149,8 +154,9 @@ jobs:
|
||||
SAMPLE_SIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.sample_size || 200 }}
|
||||
BATCH_SIZE: "20"
|
||||
RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
|
||||
KHOJ_URL: "http://localhost:42110"
|
||||
DATASET_SEED: ${{ github.event_name == 'workflow_dispatch' && inputs.dataset_seed || github.run_id }}
|
||||
KHOJ_LLM_SEED: "42"
|
||||
KHOJ_URL: "http://localhost:42110"
|
||||
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.5-flash' }}
|
||||
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 10 }}
|
||||
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
|
||||
|
||||
Reference in New Issue
Block a user