From dbc333061012c28590eecf5e559f69f7d2861439 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Tue, 19 Aug 2025 23:29:42 -0700 Subject: [PATCH] Tune reasoning effort, temp, top_p for gpt-oss models --- src/khoj/processor/conversation/openai/utils.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 1d0ecdab..f67ec2c5 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -145,6 +145,11 @@ def completion_with_backoff( # See https://qwenlm.github.io/blog/qwen3/#advanced-usages if not deepthought: add_qwen_no_think_tag(formatted_messages) + elif "gpt-oss" in model_name.lower(): + model_kwargs["temperature"] = 1 + reasoning_effort = "medium" if deepthought else "low" + model_kwargs["reasoning_effort"] = reasoning_effort + model_kwargs["top_p"] = 1.0 read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"): @@ -346,6 +351,11 @@ async def chat_completion_with_backoff( # See https://qwenlm.github.io/blog/qwen3/#advanced-usages if not deepthought: add_qwen_no_think_tag(formatted_messages) + elif "gpt-oss" in model_name.lower(): + temperature = 1 + reasoning_effort = "medium" if deepthought else "low" + model_kwargs["reasoning_effort"] = reasoning_effort + model_kwargs["top_p"] = 1.0 read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"):