Support recently released Openai reasoning models

- Rely on deepthought flag to control reasoning effort
- Generalize Openai reasoning model check for all o- series models
This commit is contained in:
Debanjum
2025-04-18 14:21:07 +05:30
parent 2f8283935a
commit 9c70a0f3f5
2 changed files with 17 additions and 6 deletions

View File

@@ -64,9 +64,10 @@ def completion_with_backoff(
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
# Tune reasoning models arguments
if model_name.startswith("o1") or model_name.startswith("o3"):
if is_openai_reasoning_model(model_name, api_base_url):
temperature = 1
model_kwargs["reasoning_effort"] = "medium"
reasoning_effort = "medium" if deepthought else "low"
model_kwargs["reasoning_effort"] = reasoning_effort
model_kwargs["stream_options"] = {"include_usage": True}
if os.getenv("KHOJ_LLM_SEED"):
@@ -162,12 +163,13 @@ def llm_thread(
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
# Tune reasoning models arguments
if model_name.startswith("o1") or model_name.startswith("o3"):
# Configure thinking for openai reasoning models
if is_openai_reasoning_model(model_name, api_base_url):
temperature = 1
model_kwargs["reasoning_effort"] = "medium"
reasoning_effort = "medium" if deepthought else "low"
model_kwargs["reasoning_effort"] = reasoning_effort
model_kwargs.pop("stop", None) # Remove unsupported stop param for reasoning models
if model_name.startswith("o3"):
# Get the first system message and add the string `Formatting re-enabled` to it.
# See https://platform.openai.com/docs/guides/reasoning-best-practices
if len(formatted_messages) > 0:
@@ -257,3 +259,10 @@ def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> Js
if host == "api.deepinfra.com":
return JsonSupport.OBJECT
return JsonSupport.SCHEMA
def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool:
"""
Check if the model is an OpenAI reasoning model
"""
return model_name.startswith("o") and (api_base_url is None or api_base_url.startswith("https://api.openai.com/v1"))

View File

@@ -39,8 +39,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
"gpt-4o": {"input": 2.50, "output": 10.00},
"gpt-4o-mini": {"input": 0.15, "output": 0.60},
"o1": {"input": 15.0, "output": 60.00},
"o3": {"input": 10.0, "output": 40.00},
"o1-mini": {"input": 3.0, "output": 12.0},
"o3-mini": {"input": 1.10, "output": 4.40},
"o4-mini": {"input": 1.10, "output": 4.40},
# Gemini Pricing: https://ai.google.dev/pricing
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},