mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 21:19:12 +00:00
Support Google Gemini 3
- Use thinking level for gemini 3 models instead of thinking budget. - Bump google gemini library - Add default context, pricing
This commit is contained in:
@@ -203,8 +203,11 @@ def gemini_completion_with_backoff(
|
||||
response_schema = clean_response_schema(model_kwargs["response_schema"])
|
||||
|
||||
thinking_config = None
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
if deepthought and model_name.startswith("gemini-2.5"):
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
||||
elif model_name.startswith("gemini-3"):
|
||||
thinking_level = gtypes.ThinkingLevel.HIGH if deepthought else gtypes.ThinkingLevel.LOW
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_level=thinking_level, include_thoughts=True)
|
||||
|
||||
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
||||
if is_reasoning_model(model_name):
|
||||
@@ -321,8 +324,11 @@ async def gemini_chat_completion_with_backoff(
|
||||
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
||||
|
||||
thinking_config = None
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
if deepthought and model_name.startswith("gemini-2.5"):
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
||||
elif model_name.startswith("gemini-3"):
|
||||
thinking_level = gtypes.ThinkingLevel.HIGH if deepthought else gtypes.ThinkingLevel.LOW
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_level=thinking_level, include_thoughts=True)
|
||||
|
||||
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
||||
if is_reasoning_model(model_name):
|
||||
@@ -563,7 +569,7 @@ def is_reasoning_model(model_name: str) -> bool:
|
||||
"""
|
||||
Check if the model is a reasoning model.
|
||||
"""
|
||||
return model_name.startswith("gemini-2.5")
|
||||
return model_name.startswith("gemini-2.5") or model_name.startswith("gemini-3")
|
||||
|
||||
|
||||
def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
|
||||
|
||||
@@ -70,6 +70,7 @@ model_to_prompt_size = {
|
||||
"gpt-5-mini-2025-08-07": 120000,
|
||||
"gpt-5-nano-2025-08-07": 120000,
|
||||
# Google Models
|
||||
"gemini-3-pro-preview": 120000,
|
||||
"gemini-2.5-flash": 120000,
|
||||
"gemini-2.5-flash-lite": 120000,
|
||||
"gemini-2.5-pro": 60000,
|
||||
|
||||
@@ -51,8 +51,9 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
||||
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
||||
"gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
|
||||
"gemini-2.5-flash-lite": {"input": 0.10, "output": 0.40},
|
||||
"gemini-2.5-flash": {"input": 0.30, "cache_read_tokens": 0.075, "output": 2.50},
|
||||
"gemini-2.5-pro": {"input": 1.25, "cache_read_tokens": 0.31, "output": 10.0},
|
||||
"gemini-2.5-flash": {"input": 0.30, "cache_read_tokens": 0.03, "output": 2.50},
|
||||
"gemini-2.5-pro": {"input": 1.25, "cache_read_tokens": 0.125, "output": 10.0},
|
||||
"gemini-3-pro-preview": {"input": 2.00, "cache_read_tokens": 0.20, "output": 12.0},
|
||||
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api
|
||||
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0, "cache_read": 0.08, "cache_write": 1.0},
|
||||
"claude-3-5-haiku@20241022": {"input": 1.0, "output": 5.0, "cache_read": 0.08, "cache_write": 1.0},
|
||||
|
||||
Reference in New Issue
Block a user