diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 5f36b496..8b00cda8 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -145,11 +145,8 @@ def completion_with_backoff( # See https://qwenlm.github.io/blog/qwen3/#advanced-usages if not deepthought: add_qwen_no_think_tag(formatted_messages) - elif "gpt-oss" in model_name.lower(): - model_kwargs["temperature"] = 1 - reasoning_effort = "medium" if deepthought else "low" - model_kwargs["reasoning_effort"] = reasoning_effort - model_kwargs["top_p"] = 1.0 + elif is_groq_api(api_base_url): + model_kwargs["service_tier"] = "auto" read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"): @@ -355,11 +352,8 @@ async def chat_completion_with_backoff( # See https://qwenlm.github.io/blog/qwen3/#advanced-usages if not deepthought: add_qwen_no_think_tag(formatted_messages) - elif "gpt-oss" in model_name.lower(): - temperature = 1 - reasoning_effort = "medium" if deepthought else "low" - model_kwargs["reasoning_effort"] = reasoning_effort - model_kwargs["top_p"] = 1.0 + elif is_groq_api(api_base_url): + model_kwargs["service_tier"] = "auto" read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"): @@ -854,8 +848,10 @@ def is_openai_reasoning_model(model_name: str, api_base_url: str = None) -> bool """ Check if the model is an OpenAI reasoning model """ - return is_openai_api(api_base_url) and ( - model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5") + return ( + is_openai_api(api_base_url) + and (model_name.lower().startswith("o") or model_name.lower().startswith("gpt-5")) + or model_name.lower().startswith("gpt-oss") ) @@ -879,6 +875,13 @@ def is_twitter_reasoning_model(model_name: str, api_base_url: str = None) -> boo ) +def is_groq_api(api_base_url: str = None) -> bool: + """ + Check if the model is served over the Groq API + """ + return api_base_url is not None and api_base_url.startswith("https://api.groq.com") + + def is_qwen_style_reasoning_model(model_name: str, api_base_url: str = None) -> bool: """ Check if the model is a Qwen style reasoning model diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index 0a55b165..e6381014 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -75,4 +75,5 @@ model_to_cost: Dict[str, Dict[str, float]] = { # Groq pricing "moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00}, "openai/gpt-oss-120b": {"input": 0.15, "output": 0.75}, + "openai/gpt-oss-20b": {"input": 0.10, "output": 0.50}, }