mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-08 05:39:13 +00:00
Support deepthought in research mode with new Grok 3 reasoning model
Rely on deepthought flag to control reasoning effort of low/high for the grok model This is different from the openai reasoning models which support low/medium/high and for which we use low/medium effort based on the deepthought flag Note: grok is accessible over an openai compatible API
This commit is contained in:
@@ -68,6 +68,9 @@ def completion_with_backoff(
|
|||||||
temperature = 1
|
temperature = 1
|
||||||
reasoning_effort = "medium" if deepthought else "low"
|
reasoning_effort = "medium" if deepthought else "low"
|
||||||
model_kwargs["reasoning_effort"] = reasoning_effort
|
model_kwargs["reasoning_effort"] = reasoning_effort
|
||||||
|
elif is_twitter_reasoning_model(model_name, api_base_url):
|
||||||
|
reasoning_effort = "high" if deepthought else "low"
|
||||||
|
model_kwargs["reasoning_effort"] = reasoning_effort
|
||||||
|
|
||||||
model_kwargs["stream_options"] = {"include_usage": True}
|
model_kwargs["stream_options"] = {"include_usage": True}
|
||||||
if os.getenv("KHOJ_LLM_SEED"):
|
if os.getenv("KHOJ_LLM_SEED"):
|
||||||
@@ -181,6 +184,9 @@ def llm_thread(
|
|||||||
formatted_messages[first_system_message_index][
|
formatted_messages[first_system_message_index][
|
||||||
"content"
|
"content"
|
||||||
] = f"{first_system_message} Formatting re-enabled"
|
] = f"{first_system_message} Formatting re-enabled"
|
||||||
|
elif is_twitter_reasoning_model(model_name, api_base_url):
|
||||||
|
reasoning_effort = "high" if deepthought else "low"
|
||||||
|
model_kwargs["reasoning_effort"] = reasoning_effort
|
||||||
elif model_name.startswith("deepseek-reasoner"):
|
elif model_name.startswith("deepseek-reasoner"):
|
||||||
# Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
|
# Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
|
||||||
# The first message should always be a user message (except system message).
|
# The first message should always be a user message (except system message).
|
||||||
@@ -266,3 +272,14 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool
|
|||||||
Check if the model is an OpenAI reasoning model
|
Check if the model is an OpenAI reasoning model
|
||||||
"""
|
"""
|
||||||
return model_name.startswith("o") and (api_base_url is None or api_base_url.startswith("https://api.openai.com/v1"))
|
return model_name.startswith("o") and (api_base_url is None or api_base_url.startswith("https://api.openai.com/v1"))
|
||||||
|
|
||||||
|
|
||||||
|
def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the model is a Twitter reasoning model
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
model_name.startswith("grok-3-mini")
|
||||||
|
and api_base_url is not None
|
||||||
|
and api_base_url.startswith("https://api.x.ai/v1")
|
||||||
|
)
|
||||||
|
|||||||
@@ -57,4 +57,9 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|||||||
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
"claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
"claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
|
# Grok pricing: https://docs.x.ai/docs/models
|
||||||
|
"grok-3": {"input": 3.0, "output": 15.0},
|
||||||
|
"grok-3-latest": {"input": 3.0, "output": 15.0},
|
||||||
|
"grok-3-mini": {"input": 0.30, "output": 0.50},
|
||||||
|
"grok-3-mini-latest": {"input": 0.30, "output": 0.50},
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user