Disable SimpleQA eval on release as saturated & low signal for usecase

Reaching >94% in research mode on SimpleQA. When answers can be
researched online, it becomes too easy. And the FRAMES eval does a
more thorough job of evaluating that use-case anyway.
This commit is contained in:
Debanjum
2025-03-22 08:05:12 +05:30
parent 45015dae27
commit 6cc5a10b09

View File

@@ -58,7 +58,7 @@ jobs:
matrix: matrix:
# Use input from manual trigger if available, else run all combinations # Use input from manual trigger if available, else run all combinations
khoj_mode: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.khoj_mode)) || fromJSON('["general", "default", "research"]') }} khoj_mode: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.khoj_mode)) || fromJSON('["general", "default", "research"]') }}
dataset: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.dataset)) || fromJSON('["frames", "simpleqa", "gpqa"]') }} dataset: ${{ github.event_name == 'workflow_dispatch' && fromJSON(format('["{0}"]', inputs.dataset)) || fromJSON('["frames", "gpqa"]') }}
services: services:
postgres: postgres: