diff --git a/.github/workflows/run_evals.yml b/.github/workflows/run_evals.yml index 6e6f7143..9544b7f3 100644 --- a/.github/workflows/run_evals.yml +++ b/.github/workflows/run_evals.yml @@ -147,7 +147,7 @@ jobs: echo "## Evaluation Summary of Khoj on ${{ matrix.dataset }} in ${{ matrix.khoj_mode }} mode" >> $GITHUB_STEP_SUMMARY echo "**$(head -n 1 *_evaluation_summary_*.txt)**" >> $GITHUB_STEP_SUMMARY echo "- Khoj Version: ${{ steps.hatch.outputs.version }}" >> $GITHUB_STEP_SUMMARY - echo "- Chat Model: Gemini 1.5 Flash 002" >> $GITHUB_STEP_SUMMARY + echo "- Chat Model: Gemini 2.0 Flash" >> $GITHUB_STEP_SUMMARY echo "\`\`\`" >> $GITHUB_STEP_SUMMARY tail -n +2 *_evaluation_summary_*.txt >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY diff --git a/documentation/docs/get-started/setup.mdx b/documentation/docs/get-started/setup.mdx index c6cdec42..fb8e9f4c 100644 --- a/documentation/docs/get-started/setup.mdx +++ b/documentation/docs/get-started/setup.mdx @@ -333,7 +333,7 @@ Using Ollama? See the [Ollama Integration](/advanced/ollama) section for more cu - Add your [Gemini API key](https://aistudio.google.com/app/apikey) - Give the configuration a friendly name like `Gemini`. Do not configure the API base url. 2. Create a new [chat model](http://localhost:42110/server/admin/database/chatmodel/add) - - Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-1.5-flash`. + - Set the `chat-model` field to a [Google Gemini chat model](https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-models). Example: `gemini-2.0-flash`. - Set the `model-type` field to `Google`. - Set the `ai model api` field to the Gemini AI Model API you created in step 1. diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 6fd95ccd..cb25258c 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) def extract_questions_gemini( text, - model: Optional[str] = "gemini-1.5-flash", + model: Optional[str] = "gemini-2.0-flash", conversation_log={}, api_key=None, temperature=0, @@ -132,9 +132,9 @@ def gemini_send_message_to_model( model_kwargs = {} - # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky. - # if response_type == "json_object": - # model_kwargs["response_mime_type"] = "application/json" + # This caused unwanted behavior and terminates response early for gemini 1.5 series. Monitor for flakiness with 2.0 series. + if response_type == "json_object" and model in ["gemini-2.0-flash"]: + model_kwargs["response_mime_type"] = "application/json" # Get Response from Gemini return gemini_completion_with_backoff( @@ -154,7 +154,7 @@ def converse_gemini( online_results: Optional[Dict[str, Dict]] = None, code_results: Optional[Dict[str, Dict]] = None, conversation_log={}, - model: Optional[str] = "gemini-1.5-flash", + model: Optional[str] = "gemini-2.0-flash", api_key: Optional[str] = None, temperature: float = 0.2, completion_func=None, diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index b3ff1f97..74c06172 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -18,7 +18,7 @@ default_offline_chat_models = [ "bartowski/Qwen2.5-14B-Instruct-GGUF", ] default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"] -default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"] +default_gemini_chat_models = ["gemini-2.0-flash", "gemini-1.5-pro"] default_anthropic_chat_models = ["claude-3-5-sonnet-20241022", "claude-3-5-haiku-20241022"] empty_config = { @@ -46,6 +46,7 @@ model_to_cost: Dict[str, Dict[str, float]] = { "gemini-1.5-flash-002": {"input": 0.075, "output": 0.30}, "gemini-1.5-pro": {"input": 1.25, "output": 5.00}, "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00}, + "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_ "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0}, "claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0}, diff --git a/tests/conftest.py b/tests/conftest.py index 1795b340..e5ab3a8e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -315,7 +315,7 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa if chat_provider == ChatModel.ModelType.OPENAI: online_chat_model = ChatModelFactory(name="gpt-4o-mini", model_type="openai") elif chat_provider == ChatModel.ModelType.GOOGLE: - online_chat_model = ChatModelFactory(name="gemini-1.5-flash", model_type="google") + online_chat_model = ChatModelFactory(name="gemini-2.0-flash", model_type="google") elif chat_provider == ChatModel.ModelType.ANTHROPIC: online_chat_model = ChatModelFactory(name="claude-3-5-haiku-20241022", model_type="anthropic") if online_chat_model: diff --git a/tests/evals/eval.py b/tests/evals/eval.py index a1749435..0c95996f 100644 --- a/tests/evals/eval.py +++ b/tests/evals/eval.py @@ -629,7 +629,7 @@ def main(): response_evaluator = evaluate_response_with_mcq_match elif args.dataset == "math500": response_evaluator = partial( - evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-1.5-flash-002") + evaluate_response_with_gemini, eval_model=os.getenv("GEMINI_EVAL_MODEL", "gemini-2.0-flash-001") ) elif args.dataset == "frames_ir": response_evaluator = evaluate_response_for_ir