mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 05:29:12 +00:00
Support Google Gemini 3
- Use thinking level for gemini 3 models instead of thinking budget. - Bump google gemini library - Add default context, pricing
This commit is contained in:
@@ -87,7 +87,7 @@ dependencies = [
|
||||
"django_apscheduler == 0.7.0",
|
||||
"anthropic == 0.52.0",
|
||||
"docx2txt == 0.8",
|
||||
"google-genai == 1.11.0",
|
||||
"google-genai == 1.51.0",
|
||||
"google-auth ~= 2.23.3",
|
||||
"pyjson5 == 1.6.7",
|
||||
"resend == 1.0.1",
|
||||
|
||||
@@ -203,8 +203,11 @@ def gemini_completion_with_backoff(
|
||||
response_schema = clean_response_schema(model_kwargs["response_schema"])
|
||||
|
||||
thinking_config = None
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
if deepthought and model_name.startswith("gemini-2.5"):
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
||||
elif model_name.startswith("gemini-3"):
|
||||
thinking_level = gtypes.ThinkingLevel.HIGH if deepthought else gtypes.ThinkingLevel.LOW
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_level=thinking_level, include_thoughts=True)
|
||||
|
||||
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
||||
if is_reasoning_model(model_name):
|
||||
@@ -321,8 +324,11 @@ async def gemini_chat_completion_with_backoff(
|
||||
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
||||
|
||||
thinking_config = None
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
if deepthought and model_name.startswith("gemini-2.5"):
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
||||
elif model_name.startswith("gemini-3"):
|
||||
thinking_level = gtypes.ThinkingLevel.HIGH if deepthought else gtypes.ThinkingLevel.LOW
|
||||
thinking_config = gtypes.ThinkingConfig(thinking_level=thinking_level, include_thoughts=True)
|
||||
|
||||
max_output_tokens = MAX_OUTPUT_TOKENS_FOR_STANDARD_GEMINI
|
||||
if is_reasoning_model(model_name):
|
||||
@@ -563,7 +569,7 @@ def is_reasoning_model(model_name: str) -> bool:
|
||||
"""
|
||||
Check if the model is a reasoning model.
|
||||
"""
|
||||
return model_name.startswith("gemini-2.5")
|
||||
return model_name.startswith("gemini-2.5") or model_name.startswith("gemini-3")
|
||||
|
||||
|
||||
def to_gemini_tools(tools: List[ToolDefinition]) -> List[gtypes.ToolDict] | None:
|
||||
|
||||
@@ -70,6 +70,7 @@ model_to_prompt_size = {
|
||||
"gpt-5-mini-2025-08-07": 120000,
|
||||
"gpt-5-nano-2025-08-07": 120000,
|
||||
# Google Models
|
||||
"gemini-3-pro-preview": 120000,
|
||||
"gemini-2.5-flash": 120000,
|
||||
"gemini-2.5-flash-lite": 120000,
|
||||
"gemini-2.5-pro": 60000,
|
||||
|
||||
@@ -51,8 +51,9 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
||||
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
||||
"gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
|
||||
"gemini-2.5-flash-lite": {"input": 0.10, "output": 0.40},
|
||||
"gemini-2.5-flash": {"input": 0.30, "cache_read_tokens": 0.075, "output": 2.50},
|
||||
"gemini-2.5-pro": {"input": 1.25, "cache_read_tokens": 0.31, "output": 10.0},
|
||||
"gemini-2.5-flash": {"input": 0.30, "cache_read_tokens": 0.03, "output": 2.50},
|
||||
"gemini-2.5-pro": {"input": 1.25, "cache_read_tokens": 0.125, "output": 10.0},
|
||||
"gemini-3-pro-preview": {"input": 2.00, "cache_read_tokens": 0.20, "output": 12.0},
|
||||
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api
|
||||
"claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0, "cache_read": 0.08, "cache_write": 1.0},
|
||||
"claude-3-5-haiku@20241022": {"input": 1.0, "output": 5.0, "cache_read": 0.08, "cache_write": 1.0},
|
||||
|
||||
15
uv.lock
generated
15
uv.lock
generated
@@ -871,7 +871,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "google-genai"
|
||||
version = "1.11.0"
|
||||
version = "1.51.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
@@ -879,12 +879,13 @@ dependencies = [
|
||||
{ name = "httpx" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "requests" },
|
||||
{ name = "tenacity" },
|
||||
{ name = "typing-extensions" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/73/44/64c6c23724580add879cbcca81ffed500955c1c21850468cd4dcf9c62a03/google_genai-1.11.0.tar.gz", hash = "sha256:0643b2f5373fbeae945d0cd5a37d157eab0c172bb5e14e905f2f8d45aa51cabb", size = 160955, upload-time = "2025-04-16T23:34:37.979Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/c3/1c/29245699c7c274ed5709b33b6a5192af2d57da5da3d2f189f222d1895336/google_genai-1.51.0.tar.gz", hash = "sha256:596c1ec964b70fec17a6ccfe6ee4edede31022584e8b1d33371d93037c4001b1", size = 258060, upload-time = "2025-11-18T05:32:47.068Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/dc/9b/55f97203720cbda5a1c8e0460793914980e41c6ca4859fea735dd66d2c3a/google_genai-1.11.0-py3-none-any.whl", hash = "sha256:34fbe3c85419adbcddcb8222f99514596b3a69c80ff1a4ae30a01a763da27acc", size = 159687, upload-time = "2025-04-16T23:34:36.595Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/28/0185dcda66f1994171067cfdb0e44a166450239d5b11b3a8a281dd2da459/google_genai-1.51.0-py3-none-any.whl", hash = "sha256:bfb7d0c6ba48ba9bda539f0d5e69dad827d8735a8b1e4703bafa0a2945d293e1", size = 260483, upload-time = "2025-11-18T05:32:45.938Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -1311,7 +1312,7 @@ requires-dist = [
|
||||
{ name = "freezegun", marker = "extra == 'dev'", specifier = ">=1.2.0" },
|
||||
{ name = "gitpython", marker = "extra == 'dev'", specifier = "~=3.1.43" },
|
||||
{ name = "google-auth", specifier = "~=2.23.3" },
|
||||
{ name = "google-genai", specifier = "==1.11.0" },
|
||||
{ name = "google-genai", specifier = "==1.51.0" },
|
||||
{ name = "gunicorn", marker = "extra == 'dev'", specifier = "==22.0.0" },
|
||||
{ name = "gunicorn", marker = "extra == 'prod'", specifier = "==22.0.0" },
|
||||
{ name = "httpx", specifier = "==0.28.1" },
|
||||
@@ -2027,7 +2028,7 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "2.7.2"
|
||||
version = "2.8.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
@@ -2039,9 +2040,9 @@ dependencies = [
|
||||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/71/e3/cec27fa28ef36c4ccea71e9e8c20be9b8539618732989a82027575aab9d4/openai-2.7.2.tar.gz", hash = "sha256:082ef61163074d8efad0035dd08934cf5e3afd37254f70fc9165dd6a8c67dcbd", size = 595732, upload-time = "2025-11-10T16:42:31.108Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/d5/e4/42591e356f1d53c568418dc7e30dcda7be31dd5a4d570bca22acb0525862/openai-2.8.1.tar.gz", hash = "sha256:cb1b79eef6e809f6da326a7ef6038719e35aa944c42d081807bfa1be8060f15f", size = 602490, upload-time = "2025-11-17T22:39:59.549Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/25/66/22cfe4b695b5fd042931b32c67d685e867bfd169ebf46036b95b57314c33/openai-2.7.2-py3-none-any.whl", hash = "sha256:116f522f4427f8a0a59b51655a356da85ce092f3ed6abeca65f03c8be6e073d9", size = 1008375, upload-time = "2025-11-10T16:42:28.574Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/55/4f/dbc0c124c40cb390508a82770fb9f6e3ed162560181a85089191a851c59a/openai-2.8.1-py3-none-any.whl", hash = "sha256:c6c3b5a04994734386e8dad3c00a393f56d3b68a27cd2e8acae91a59e4122463", size = 1022688, upload-time = "2025-11-17T22:39:57.675Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
||||
Reference in New Issue
Block a user