mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-07 21:29:13 +00:00
Parse DeepSeek reasoning model thoughts served via OpenAI compatible API
DeepSeek reasoners returns reasoning in reasoning_content field. Create an async stream processor to parse the reasoning out when using the deepseek reasoner model.
This commit is contained in:
@@ -184,6 +184,7 @@ async def chat_completion_with_backoff(
|
|||||||
reasoning_effort = "high" if deepthought else "low"
|
reasoning_effort = "high" if deepthought else "low"
|
||||||
model_kwargs["reasoning_effort"] = reasoning_effort
|
model_kwargs["reasoning_effort"] = reasoning_effort
|
||||||
elif model_name.startswith("deepseek-reasoner"):
|
elif model_name.startswith("deepseek-reasoner"):
|
||||||
|
stream_processor = adeepseek_stream_processor
|
||||||
# Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
|
# Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role.
|
||||||
# The first message should always be a user message (except system message).
|
# The first message should always be a user message (except system message).
|
||||||
updated_messages: List[dict] = []
|
updated_messages: List[dict] = []
|
||||||
@@ -194,7 +195,6 @@ async def chat_completion_with_backoff(
|
|||||||
updated_messages[-1]["content"] += " " + message["content"]
|
updated_messages[-1]["content"] += " " + message["content"]
|
||||||
else:
|
else:
|
||||||
updated_messages.append(message)
|
updated_messages.append(message)
|
||||||
|
|
||||||
formatted_messages = updated_messages
|
formatted_messages = updated_messages
|
||||||
elif is_qwen_reasoning_model(model_name, api_base_url):
|
elif is_qwen_reasoning_model(model_name, api_base_url):
|
||||||
stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
|
stream_processor = partial(ain_stream_thought_processor, thought_tag="think")
|
||||||
@@ -349,6 +349,19 @@ async def adefault_stream_processor(
|
|||||||
yield ChatCompletionWithThoughtsChunk.model_validate(chunk.model_dump())
|
yield ChatCompletionWithThoughtsChunk.model_validate(chunk.model_dump())
|
||||||
|
|
||||||
|
|
||||||
|
async def adeepseek_stream_processor(
|
||||||
|
chat_stream: openai.AsyncStream[ChatCompletionChunk],
|
||||||
|
) -> AsyncGenerator[ChatCompletionWithThoughtsChunk, None]:
|
||||||
|
"""
|
||||||
|
Async generator to cast and return chunks from the deepseek chat completions stream.
|
||||||
|
"""
|
||||||
|
async for chunk in chat_stream:
|
||||||
|
tchunk = ChatCompletionWithThoughtsChunk.model_validate(chunk.model_dump())
|
||||||
|
if len(tchunk.choices) > 0 and tchunk.choices[0].delta.reasoning_content:
|
||||||
|
tchunk.choices[0].delta.thought = chunk.choices[0].delta.reasoning_content
|
||||||
|
yield tchunk
|
||||||
|
|
||||||
|
|
||||||
def in_stream_thought_processor(
|
def in_stream_thought_processor(
|
||||||
chat_stream: openai.Stream[ChatCompletionChunk], thought_tag="think"
|
chat_stream: openai.Stream[ChatCompletionChunk], thought_tag="think"
|
||||||
) -> Generator[ChatCompletionStreamWithThoughtEvent, None, None]:
|
) -> Generator[ChatCompletionStreamWithThoughtEvent, None, None]:
|
||||||
|
|||||||
Reference in New Issue
Block a user