Tune reasoning effort, temp, top_p for gpt-oss models

2026-04-20 01:24:31 +00:00 · 2025-08-19 23:29:42 -07:00
parent 83d725d2d8
commit dbc3330610
1 changed files with 10 additions and 0 deletions
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -145,6 +145,11 @@ def completion_with_backoff(
        # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
        if not deepthought:
            add_qwen_no_think_tag(formatted_messages)
+    elif "gpt-oss" in model_name.lower():
+        model_kwargs["temperature"] = 1
+        reasoning_effort = "medium" if deepthought else "low"
+        model_kwargs["reasoning_effort"] = reasoning_effort
+        model_kwargs["top_p"] = 1.0

    read_timeout = 300 if is_local_api(api_base_url) else 60
    if os.getenv("KHOJ_LLM_SEED"):
@@ -346,6 +351,11 @@ async def chat_completion_with_backoff(
        # See https://qwenlm.github.io/blog/qwen3/#advanced-usages
        if not deepthought:
            add_qwen_no_think_tag(formatted_messages)
+    elif "gpt-oss" in model_name.lower():
+        temperature = 1
+        reasoning_effort = "medium" if deepthought else "low"
+        model_kwargs["reasoning_effort"] = reasoning_effort
+        model_kwargs["top_p"] = 1.0

    read_timeout = 300 if is_local_api(api_base_url) else 60
    if os.getenv("KHOJ_LLM_SEED"):