Extract thoughts of openai style models like gpt-oss from api response

They use delta.reasoning instead of delta.reasoning_content to share model reasoning
2026-03-02 21:19:12 +00:00 · 2025-08-19 23:26:45 -07:00
parent f483a626b8
commit 83d725d2d8
1 changed files with 15 additions and 0 deletions
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -173,6 +173,13 @@ def completion_with_backoff(
                    and chunk.chunk.choices[0].delta.reasoning_content
                ):
                    thoughts += chunk.chunk.choices[0].delta.reasoning_content
+                elif (
+                    chunk.type == "chunk"
+                    and chunk.chunk.choices
+                    and hasattr(chunk.chunk.choices[0].delta, "reasoning")
+                    and chunk.chunk.choices[0].delta.reasoning
+                ):
+                    thoughts += chunk.chunk.choices[0].delta.reasoning
                elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
                    tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
                elif chunk.type == "tool_calls.function.arguments.done":
@@ -945,6 +952,14 @@ async def astream_thought_processor(
            ):
                tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content

+            # Handlle openai reasoning style response with thoughts. Used by gpt-oss.
+            if (
+                len(tchunk.choices) > 0
+                and hasattr(tchunk.choices[0].delta, "reasoning")
+                and tchunk.choices[0].delta.reasoning
+            ):
+                tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning
+
            # Handlle llama.cpp server style response with thoughts.
            elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
                tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")