From 29e5d7ef08aa21b96a8f5bec94a31f3f8b687a80 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Fri, 27 Jun 2025 17:53:46 -0700 Subject: [PATCH] Improve support for new Deepseek R1 model over Openai compatible api Parse thinking out from .. tags in chat response Handle merging structured message content, not just str, for deepseek. --- src/khoj/processor/conversation/openai/utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index 253cdc25..440d3286 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -229,16 +229,22 @@ async def chat_completion_with_backoff( stream_processor = adeepseek_stream_processor reasoning_effort = "high" if deepthought else "low" model_kwargs["reasoning_effort"] = reasoning_effort - elif model_name.startswith("deepseek-reasoner"): - stream_processor = adeepseek_stream_processor + elif model_name.startswith("deepseek-reasoner") or "deepseek-r1" in model_name: + # Official Deepseek reasoner model returns structured thinking output. + # Deepseek r1 served via other AI model API providers return it in response stream + stream_processor = ain_stream_thought_processor if "deepseek-r1" in model_name else adeepseek_stream_processor # type: ignore[assignment] # Two successive messages cannot be from the same role. Should merge any back-to-back messages from the same role. # The first message should always be a user message (except system message). updated_messages: List[dict] = [] for i, message in enumerate(formatted_messages): if i > 0 and message["role"] == formatted_messages[i - 1]["role"]: - updated_messages[-1]["content"] += " " + message["content"] + updated_messages[-1]["content"] += ( + " " + message["content"] if isinstance(message["content"], str) else message["content"] + ) elif i == 1 and formatted_messages[i - 1]["role"] == "system" and message["role"] == "assistant": - updated_messages[-1]["content"] += " " + message["content"] + updated_messages[-1]["content"] += ( + " " + message["content"] if isinstance(message["content"], str) else message["content"] + ) else: updated_messages.append(message) formatted_messages = updated_messages