From ff73d30106c582472b6aed785520a2304c1e0ccc Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Aug 2025 23:25:11 -0700 Subject: [PATCH] Fix max thinking budget for gemini models to generate final response --- src/khoj/processor/conversation/google/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/khoj/processor/conversation/google/utils.py b/src/khoj/processor/conversation/google/utils.py index d3776319..3a64434c 100644 --- a/src/khoj/processor/conversation/google/utils.py +++ b/src/khoj/processor/conversation/google/utils.py @@ -326,7 +326,7 @@ async def gemini_chat_completion_with_backoff( thinking_config = None if deepthought and is_reasoning_model(model_name): - thinking_config = gtypes.ThinkingConfig(thinking_budget=-1, include_thoughts=True) + thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True) max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI if is_reasoning_model(model_name):