diff --git a/.github/workflows/run_evals.yml b/.github/workflows/run_evals.yml index 9d63d74a..6ee7b5ab 100644 --- a/.github/workflows/run_evals.yml +++ b/.github/workflows/run_evals.yml @@ -43,7 +43,7 @@ on: chat_model: description: 'Chat model to use' required: false - default: 'gemini-2.0-flash' + default: 'gemini-2.5-flash' type: string max_research_iterations: description: 'Maximum number of iterations in research mode' @@ -151,7 +151,7 @@ jobs: RANDOMIZE: ${{ github.event_name == 'workflow_dispatch' && inputs.randomize || 'true' }} KHOJ_URL: "http://localhost:42110" KHOJ_LLM_SEED: "42" - KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.0-flash' }} + KHOJ_DEFAULT_CHAT_MODEL: ${{ github.event_name == 'workflow_dispatch' && inputs.chat_model || 'gemini-2.5-flash' }} KHOJ_RESEARCH_ITERATIONS: ${{ github.event_name == 'workflow_dispatch' && inputs.max_research_iterations || 10 }} KHOJ_AUTO_READ_WEBPAGE: ${{ github.event_name == 'workflow_dispatch' && inputs.auto_read_webpage || 'false' }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} @@ -224,7 +224,7 @@ jobs: echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY - echo "- Chat Model: ${{ inputs.chat_model || 'gemini-2.0-flash' }}" >> $GITHUB_STEP_SUMMARY + echo "- Chat Model: ${{ inputs.chat_model || 'gemini-2.5-flash' }}" >> $GITHUB_STEP_SUMMARY echo "- Code Sandbox: ${{ inputs.sandbox || 'terrarium' }}" >> $GITHUB_STEP_SUMMARY echo "\`\`\`" >> $GITHUB_STEP_SUMMARY tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY diff --git a/documentation/docs/get-started/setup.mdx b/documentation/docs/get-started/setup.mdx index 01d60496..02f0eada 100644 --- a/documentation/docs/get-started/setup.mdx +++ b/documentation/docs/get-started/setup.mdx @@ -279,7 +279,7 @@ Using Ollama? See the [Ollama Integration](/advanced/ollama) section for more cu - Add your [Gemini API key](https://aistudio.google.com/app/apikey) - Give the configuration a friendly name like `Gemini`. Do not configure the API base url. 2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add) - - Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.0-flash`. + - Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.5-flash`. - Set the `model-type` field to `Google`. - Set the `ai model api` field to the Gemini AI Model API you created in step 1. diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index e6923fad..e9e7736c 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -76,7 +76,7 @@ async def converse_gemini( user_name: str = None, chat_history: List[ChatMessageModel] = [], # Model - model: Optional[str] = "gemini-2.0-flash", + model: Optional[str] = "gemini-2.5-flash", api_key: Optional[str] = None, api_base_url: Optional[str] = None, temperature: float = 1.0, diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 82a0baab..270ca23b 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -73,6 +73,7 @@ model_to_prompt_size = { "gpt-5-nano-2025-08-07": 120000, # Google Models "gemini-2.5-flash": 120000, + "gemini-2.5-flash-lite": 120000, "gemini-2.5-pro": 60000, "gemini-2.0-flash": 120000, "gemini-2.0-flash-lite": 120000, diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index c04b7c5b..0a55b165 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -11,7 +11,7 @@ app_env_filepath = "~/.khoj/env" telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry" content_directory = "~/.khoj/content/" default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1", "o3", "o4-mini"] -default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05"] +default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash", "gemini-2.5-pro", "gemini-2.5-flash-lite"] default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"] empty_config = { @@ -50,6 +50,7 @@ model_to_cost: Dict[str, Dict[str, float]] = { "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00}, "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, "gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30}, + "gemini-2.5-flash-lite": {"input": 0.10, "output": 0.40}, "gemini-2.5-flash": {"input": 0.30, "output": 2.50}, "gemini-2.5-pro": {"input": 1.25, "output": 10.0}, # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api diff --git a/tests/conftest.py b/tests/conftest.py index 1b33e94d..c8af0c93 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -246,7 +246,7 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa if chat_provider == ChatModel.ModelType.OPENAI: online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai") elif chat_provider == ChatModel.ModelType.GOOGLE: - online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google") + online_chat_model = ChatModelFactory(name="gemini-2.5-flash", model_type="google") elif chat_provider == ChatModel.ModelType.ANTHROPIC: online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic") if online_chat_model: @@ -355,7 +355,7 @@ End of file {i}. if chat_provider == ChatModel.ModelType.OPENAI: online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai") elif chat_provider == ChatModel.ModelType.GOOGLE: - online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google") + online_chat_model = ChatModelFactory(name="gemini-2.5-flash", model_type="google") elif chat_provider == ChatModel.ModelType.ANTHROPIC: online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic") diff --git a/tests/evals/eval.py b/tests/evals/eval.py index 85188d2e..8121ddfc 100644 --- a/tests/evals/eval.py +++ b/tests/evals/eval.py @@ -34,10 +34,10 @@ logger = logging.getLogger(__name__) KHOJ_URL = os.getenv("KHOJ_URL", "http://localhost:42110") KHOJ_CHAT_API_URL = f"{KHOJ_URL}/api/chat" KHOJ_API_KEY = os.getenv("KHOJ_API_KEY") -KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower() # E.g research, general, notes etc. +KHOJ_MODE = os.getenv("KHOJ_MODE", "default").lower() # E.g research, general, default etc. GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") -GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001") +GEMINI_EVAL_MODEL = os.getenv("GEMINI_EVAL_MODEL", "gemini-2.5-flash") LLM_SEED = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None SAMPLE_SIZE = os.getenv("SAMPLE_SIZE") # Number of examples to evaluate @@ -636,7 +636,7 @@ def main(): response_evaluator = evaluate_response_with_mcq_match elif args.dataset == "math500": response_evaluator = partial( - evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001") + evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.5-flash-lite") ) elif args.dataset == "frames_ir": response_evaluator = evaluate_response_for_ir @@ -696,7 +696,7 @@ def main(): if __name__ == "__main__": """ Evaluate Khoj on supported benchmarks. - Response are evaluated by GEMINI_EVAL_MODEL (default: gemini-pro-1.5-002). + Response are evaluated by GEMINI_EVAL_MODEL (default: gemini-2.5-flash). Khoj should be running at KHOJ_URL (default: http://localhost:42110). The Gemini judge model is accessed via the Gemini API with your GEMINI_API_KEY. diff --git a/tests/helpers.py b/tests/helpers.py index 6edb0946..985ba00a 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -232,7 +232,7 @@ class ChatModelFactory(factory.django.DjangoModelFactory): max_prompt_size = 20000 tokenizer = None - name = "gemini-2.0-flash" + name = "gemini-2.5-flash" model_type = get_chat_provider() ai_model_api = factory.LazyAttribute(lambda obj: AiModelApiFactory() if get_chat_api_key() else None) diff --git a/tests/test_api_automation.py b/tests/test_api_automation.py index c993cb23..b05d0e63 100644 --- a/tests/test_api_automation.py +++ b/tests/test_api_automation.py @@ -20,7 +20,7 @@ def create_test_automation(client: TestClient) -> str: """Helper function to create a test automation and return its ID.""" state.anonymous_mode = True ChatModelFactory( - name="gemini-2.0-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google")) + name="gemini-2.5-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google")) ) params = { "q": "test automation", @@ -37,7 +37,7 @@ def test_create_automation(client: TestClient): # Arrange state.anonymous_mode = True ChatModelFactory( - name="gemini-2.0-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google")) + name="gemini-2.5-flash", model_type="google", ai_model_api=AiModelApiFactory(api_key=get_chat_api_key("google")) ) params = { "q": "test automation",