Extract thoughts of openai style models like gpt-oss from api response

They use delta.reasoning instead of delta.reasoning_content to share
model reasoning
This commit is contained in:
Debanjum
2025-08-19 23:26:45 -07:00
parent f483a626b8
commit 83d725d2d8

View File

@@ -173,6 +173,13 @@ def completion_with_backoff(
and chunk.chunk.choices[0].delta.reasoning_content
):
thoughts += chunk.chunk.choices[0].delta.reasoning_content
elif (
chunk.type == "chunk"
and chunk.chunk.choices
and hasattr(chunk.chunk.choices[0].delta, "reasoning")
and chunk.chunk.choices[0].delta.reasoning
):
thoughts += chunk.chunk.choices[0].delta.reasoning
elif chunk.type == "chunk" and chunk.chunk.choices and chunk.chunk.choices[0].delta.tool_calls:
tool_ids += [tool_call.id for tool_call in chunk.chunk.choices[0].delta.tool_calls]
elif chunk.type == "tool_calls.function.arguments.done":
@@ -945,6 +952,14 @@ async def astream_thought_processor(
):
tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content
# Handlle openai reasoning style response with thoughts. Used by gpt-oss.
if (
len(tchunk.choices) > 0
and hasattr(tchunk.choices[0].delta, "reasoning")
and tchunk.choices[0].delta.reasoning
):
tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning
# Handlle llama.cpp server style response with thoughts.
elif len(tchunk.choices) > 0 and tchunk.choices[0].delta.model_extra.get("reasoning_content"):
tchunk.choices[0].delta.thought = tchunk.choices[0].delta.model_extra.get("reasoning_content")