mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 13:25:11 +00:00
Default to gemini 2.5 model series on init and for eval
This commit is contained in:
6
.github/workflows/run_evals.yml
vendored
6
.github/workflows/run_evals.yml
vendored
@@ -43,7 +43,7 @@ on:
|
|||||||
chat_model:
|
chat_model:
|
||||||
description: 'Chat model to use'
|
description: 'Chat model to use'
|
||||||
required: false
|
required: false
|
||||||
default: 'gemini-2.0-flash'
|
default: 'gemini-2.5-flash'
|
||||||
type: string
|
type: string
|
||||||
max_research_iterations:
|
max_research_iterations:
|
||||||
description: 'Maximum number of iterations in research mode'
|
description: 'Maximum number of iterations in research mode'
|
||||||
@@ -151,7 +151,7 @@ jobs:
|
|||||||
RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
|
RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }}
|
||||||
KHOJ_URL: "http://localhost:42110"
|
KHOJ_URL: "http://localhost:42110"
|
||||||
KHOJ_LLM_SEED: "42"
|
KHOJ_LLM_SEED: "42"
|
||||||
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }}
|
KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.5-flash' }}
|
||||||
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 10 }}
|
KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 10 }}
|
||||||
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
|
KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }}
|
||||||
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
|
||||||
@@ -224,7 +224,7 @@ jobs:
|
|||||||
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
|
echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
|
echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- Chat Model: ${{ inputs.chat_model || 'gemini-2.0-flash' }}" >> $GITHUB_STEP_SUMMARY
|
echo "- Chat Model: ${{ inputs.chat_model || 'gemini-2.5-flash' }}" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "- Code Sandbox: ${{ inputs.sandbox || 'terrarium' }}" >> $GITHUB_STEP_SUMMARY
|
echo "- Code Sandbox: ${{ inputs.sandbox || 'terrarium' }}" >> $GITHUB_STEP_SUMMARY
|
||||||
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
echo "\`\`\`" >> $GITHUB_STEP_SUMMARY
|
||||||
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
|
tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY
|
||||||
|
|||||||
@@ -279,7 +279,7 @@ Using Ollama? See the [Ollama Integration](/advanced/ollama) section for more cu
|
|||||||
- Add your [Gemini API key](https://aistudio.google.com/app/apikey)
|
- Add your [Gemini API key](https://aistudio.google.com/app/apikey)
|
||||||
- Give the configuration a friendly name like `Gemini`. Do not configure the API base url.
|
- Give the configuration a friendly name like `Gemini`. Do not configure the API base url.
|
||||||
2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add)
|
2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add)
|
||||||
- Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.0-flash`.
|
- Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.5-flash`.
|
||||||
- Set the `model-type` field to `Google`.
|
- Set the `model-type` field to `Google`.
|
||||||
- Set the `ai model api` field to the Gemini AI Model API you created in step 1.
|
- Set the `ai model api` field to the Gemini AI Model API you created in step 1.
|
||||||
|
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ async def converse_gemini(
|
|||||||
user_name: str = None,
|
user_name: str = None,
|
||||||
chat_history: List[ChatMessageModel] = [],
|
chat_history: List[ChatMessageModel] = [],
|
||||||
# Model
|
# Model
|
||||||
model: Optional[str] = "gemini-2.0-flash",
|
model: Optional[str] = "gemini-2.5-flash",
|
||||||
api_key: Optional[str] = None,
|
api_key: Optional[str] = None,
|
||||||
api_base_url: Optional[str] = None,
|
api_base_url: Optional[str] = None,
|
||||||
temperature: float = 1.0,
|
temperature: float = 1.0,
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ model_to_prompt_size = {
|
|||||||
"gpt-5-nano-2025-08-07": 120000,
|
"gpt-5-nano-2025-08-07": 120000,
|
||||||
# Google Models
|
# Google Models
|
||||||
"gemini-2.5-flash": 120000,
|
"gemini-2.5-flash": 120000,
|
||||||
|
"gemini-2.5-flash-lite": 120000,
|
||||||
"gemini-2.5-pro": 60000,
|
"gemini-2.5-pro": 60000,
|
||||||
"gemini-2.0-flash": 120000,
|
"gemini-2.0-flash": 120000,
|
||||||
"gemini-2.0-flash-lite": 120000,
|
"gemini-2.0-flash-lite": 120000,
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ app_env_filepath = "~/.khoj/env"
|
|||||||
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
||||||
content_directory = "~/.khoj/content/"
|
content_directory = "~/.khoj/content/"
|
||||||
default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1", "o3", "o4-mini"]
|
default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1", "o3", "o4-mini"]
|
||||||
default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05"]
|
default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash", "gemini-2.5-pro", "gemini-2.5-flash-lite"]
|
||||||
default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
|
default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
|
||||||
|
|
||||||
empty_config = {
|
empty_config = {
|
||||||
@@ -50,6 +50,7 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|||||||
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
||||||
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
||||||
"gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
|
"gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
|
||||||
|
"gemini-2.5-flash-lite": {"input": 0.10, "output": 0.40},
|
||||||
"gemini-2.5-flash": {"input": 0.30, "output": 2.50},
|
"gemini-2.5-flash": {"input": 0.30, "output": 2.50},
|
||||||
"gemini-2.5-pro": {"input": 1.25, "output": 10.0},
|
"gemini-2.5-pro": {"input": 1.25, "output": 10.0},
|
||||||
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api
|
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api
|
||||||
|
|||||||
@@ -246,7 +246,7 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa
|
|||||||
if chat_provider == ChatModel.ModelType.OPENAI:
|
if chat_provider == ChatModel.ModelType.OPENAI:
|
||||||
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
|
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
|
||||||
elif chat_provider == ChatModel.ModelType.GOOGLE:
|
elif chat_provider == ChatModel.ModelType.GOOGLE:
|
||||||
online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google")
|
online_chat_model = ChatModelFactory(name="gemini-2.5-flash", model_type="google")
|
||||||
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
|
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
|
||||||
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
|
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
|
||||||
if online_chat_model:
|
if online_chat_model:
|
||||||
@@ -355,7 +355,7 @@ End of file {i}.
|
|||||||
if chat_provider == ChatModel.ModelType.OPENAI:
|
if chat_provider == ChatModel.ModelType.OPENAI:
|
||||||
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
|
online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai")
|
||||||
elif chat_provider == ChatModel.ModelType.GOOGLE:
|
elif chat_provider == ChatModel.ModelType.GOOGLE:
|
||||||
online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google")
|
online_chat_model = ChatModelFactory(name="gemini-2.5-flash", model_type="google")
|
||||||
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
|
elif chat_provider == ChatModel.ModelType.ANTHROPIC:
|
||||||
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
|
online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic")
|
||||||
|
|
||||||
|
|||||||
@@ -34,10 +34,10 @@ logger = logging.getLogger(__name__)
|
|||||||
KHOJ_URL = os.getenv("KHOJ_URL", "http://localhost:42110")
|
KHOJ_URL = os.getenv("KHOJ_URL", "http://localhost:42110")
|
||||||
KHOJ_CHAT_API_URL = f"{KHOJ_URL}/api/chat"
|
KHOJ_CHAT_API_URL = f"{KHOJ_URL}/api/chat"
|
||||||
KHOJ_API_KEY = os.getenv("KHOJ_API_KEY")
|
KHOJ_API_KEY = os.getenv("KHOJ_API_KEY")
|
||||||
KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower() # E.g research, general, notes etc.
|
KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower() # E.g research, general, default etc.
|
||||||
|
|
||||||
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
||||||
GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001")
|
GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-2.5-flash")
|
||||||
|
|
||||||
LLM_SEED = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
LLM_SEED = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
||||||
SAMPLE_SIZE = os.getenv("SAMPLE_SIZE") # Number of examples to evaluate
|
SAMPLE_SIZE = os.getenv("SAMPLE_SIZE") # Number of examples to evaluate
|
||||||
@@ -636,7 +636,7 @@ def main():
|
|||||||
response_evaluator = evaluate_response_with_mcq_match
|
response_evaluator = evaluate_response_with_mcq_match
|
||||||
elif args.dataset == "math500":
|
elif args.dataset == "math500":
|
||||||
response_evaluator = partial(
|
response_evaluator = partial(
|
||||||
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001")
|
evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.5-flash-lite")
|
||||||
)
|
)
|
||||||
elif args.dataset == "frames_ir":
|
elif args.dataset == "frames_ir":
|
||||||
response_evaluator = evaluate_response_for_ir
|
response_evaluator = evaluate_response_for_ir
|
||||||
@@ -696,7 +696,7 @@ def main():
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
"""
|
"""
|
||||||
Evaluate Khoj on supported benchmarks.
|
Evaluate Khoj on supported benchmarks.
|
||||||
Response are evaluated by GEMINI_EVAL_MODEL (default: gemini-pro-1.5-002).
|
Response are evaluated by GEMINI_EVAL_MODEL (default: gemini-2.5-flash).
|
||||||
|
|
||||||
Khoj should be running at KHOJ_URL (default: http://localhost:42110).
|
Khoj should be running at KHOJ_URL (default: http://localhost:42110).
|
||||||
The Gemini judge model is accessed via the Gemini API with your GEMINI_API_KEY.
|
The Gemini judge model is accessed via the Gemini API with your GEMINI_API_KEY.
|
||||||
|
|||||||
@@ -232,7 +232,7 @@ class ChatModelFactory(factory.django.DjangoModelFactory):
|
|||||||
|
|
||||||
max_prompt_size = 20000
|
max_prompt_size = 20000
|
||||||
tokenizer = None
|
tokenizer = None
|
||||||
name = "gemini-2.0-flash"
|
name = "gemini-2.5-flash"
|
||||||
model_type = get_chat_provider()
|
model_type = get_chat_provider()
|
||||||
ai_model_api = factory.LazyAttribute(lambda obj: AiModelApiFactory() if get_chat_api_key() else None)
|
ai_model_api = factory.LazyAttribute(lambda obj: AiModelApiFactory() if get_chat_api_key() else None)
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ def create_test_automation(client: TestClient) -> str:
|
|||||||
"""Helper function to create a test automation and return its ID."""
|
"""Helper function to create a test automation and return its ID."""
|
||||||
state.anonymous_mode = True
|
state.anonymous_mode = True
|
||||||
ChatModelFactory(
|
ChatModelFactory(
|
||||||
name="gemini-2.0-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google"))
|
name="gemini-2.5-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google"))
|
||||||
)
|
)
|
||||||
params = {
|
params = {
|
||||||
"q": "test automation",
|
"q": "test automation",
|
||||||
@@ -37,7 +37,7 @@ def test_create_automation(client: TestClient):
|
|||||||
# Arrange
|
# Arrange
|
||||||
state.anonymous_mode = True
|
state.anonymous_mode = True
|
||||||
ChatModelFactory(
|
ChatModelFactory(
|
||||||
name="gemini-2.0-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google"))
|
name="gemini-2.5-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google"))
|
||||||
)
|
)
|
||||||
params = {
|
params = {
|
||||||
"q": "test automation",
|
"q": "test automation",
|
||||||
|
|||||||
Reference in New Issue
Block a user