diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 50e2b73e..fc79325b 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -64,9 +64,10 @@ def completion_with_backoff( formatted_messages = [{"role": message.role, "content": message.content} for message in messages] # Tune reasoning models arguments - if model_name.startswith("o1") or model_name.startswith("o3"): + if is_openai_reasoning_model(model_name, api_base_url): temperature = 1 - model_kwargs["reasoning_effort"] = "medium" + reasoning_effort = "medium" if deepthought else "low" + model_kwargs["reasoning_effort"] = reasoning_effort model_kwargs["stream_options"] = {"include_usage": True} if os.getenv("KHOJ_LLM_SEED"): @@ -162,12 +163,13 @@ def llm_thread( formatted_messages = [{"role": message.role, "content": message.content} for message in messages] - # Tune reasoning models arguments - if model_name.startswith("o1") or model_name.startswith("o3"): + # Configure thinking for openai reasoning models + if is_openai_reasoning_model(model_name, api_base_url): temperature = 1 - model_kwargs["reasoning_effort"] = "medium" + reasoning_effort = "medium" if deepthought else "low" + model_kwargs["reasoning_effort"] = reasoning_effort + model_kwargs.pop("stop", None) # Remove unsupported stop param for reasoning models - if model_name.startswith("o3"): # Get the first system message and add the string `Formatting re-enabled` to it. # See https://platform.openai.com/docs/guides/reasoning-best-practices if len(formatted_messages) > 0: @@ -257,3 +259,10 @@ def get_openai_api_json_support(model_name: str, api_base_url: str = None) -> Js if host == "api.deepinfra.com": return JsonSupport.OBJECT return JsonSupport.SCHEMA + + +def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool: + """ + Check if the model is an OpenAI reasoning model + """ + return model_name.startswith("o") and (api_base_url is None or api_base_url.startswith("https://api.openai.com/v1")) diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index 874d801e..dd93024f 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -39,8 +39,10 @@ model_to_cost: Dict[str, Dict[str, float]] = { "gpt-4o": {"input": 2.50, "output": 10.00}, "gpt-4o-mini": {"input": 0.15, "output": 0.60}, "o1": {"input": 15.0, "output": 60.00}, + "o3": {"input": 10.0, "output": 40.00}, "o1-mini": {"input": 3.0, "output": 12.0}, "o3-mini": {"input": 1.10, "output": 4.40}, + "o4-mini": {"input": 1.10, "output": 4.40}, # Gemini Pricing: https://ai.google.dev/pricing "gemini-1.5-flash": {"input": 0.075, "output": 0.30}, "gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},