Improve support for new kimi k2 thinking model

Recognize thinking by kimi k2 thinking model in <think> xml blocks
This commit is contained in:
Debanjum
2025-11-11 13:58:27 -08:00
parent 2c54a2cd10
commit 4556773f42
2 changed files with 8 additions and 1 deletions

View File

@@ -142,6 +142,8 @@ def completion_with_backoff(
else:
updated_messages.append(message)
formatted_messages = updated_messages
elif "kimi-k2-thinking" in model_name.lower():
stream_processor = in_stream_thought_processor
elif is_qwen_style_reasoning_model(model_name, api_base_url):
stream_processor = in_stream_thought_processor
# Reasoning is enabled by default. Disable when deepthought is False.
@@ -341,6 +343,8 @@ async def chat_completion_with_backoff(
else:
updated_messages.append(message)
formatted_messages = updated_messages
elif "kimi-k2-thinking" in model_name.lower():
stream_processor = ain_stream_thought_processor
elif is_qwen_style_reasoning_model(model_name, api_base_url):
stream_processor = ain_stream_thought_processor
# Reasoning is enabled by default. Disable when deepthought is False.

View File

@@ -79,7 +79,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
"grok-4": {"input": 3.0, "cache_read": 0.75, "output": 15.0},
"grok-4-fast": {"input": 0.20, "cache_read": 0.05, "output": 0.50},
# Groq pricing
"moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00},
"moonshotai/kimi-k2-instruct-0905": {"input": 1.00, "output": 3.00},
"openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
"openai/gpt-oss-20b": {"input": 0.10, "output": 0.50},
# Miscellaneous
# Moonshot AI, Baseten pricing for Kimi-K2-Thinking
"moonshotai/kimi-k2-thinking": {"input": 0.60, "output": 2.50},
}