Add MATH500 dataset to eval

Evaluate simpler MATH500 responses with gemini 1.5 flash

This improves both the speed and cost of running this eval
This commit is contained in:
Debanjum
2024-11-27 16:29:15 -08:00
parent 22aef9bf53
commit 29e801c381
2 changed files with 35 additions and 3 deletions

View File

@@ -26,6 +26,7 @@ on:
- frames
- simpleqa
- gpqa
- math500
sample_size:
description: 'Number of samples to evaluate'
required: false
@@ -96,8 +97,8 @@ jobs:
KHOJ_URL: "http://localhost:42110"
KHOJ_LLM_SEED: "42"
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
SERPER_DEV_API_KEY: ${{ secrets.SERPER_DEV_API_KEY }}
OLOSTEP_API_KEY: ${{ secrets.OLOSTEP_API_KEY }}
SERPER_DEV_API_KEY: ${{ matrix.dataset != 'math500' && secrets.SERPER_DEV_API_KEY }}
OLOSTEP_API_KEY: ${{ matrix.dataset != 'math500' && secrets.OLOSTEP_API_KEY }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
KHOJ_ADMIN_EMAIL: khoj
KHOJ_ADMIN_PASSWORD: khoj