mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Improve support for new kimi k2 thinking model
Recognize thinking by kimi k2 thinking model in <think> xml blocks
This commit is contained in:
@@ -142,6 +142,8 @@ def completion_with_backoff(
|
||||
else:
|
||||
updated_messages.append(message)
|
||||
formatted_messages = updated_messages
|
||||
elif "kimi-k2-thinking" in model_name.lower():
|
||||
stream_processor = in_stream_thought_processor
|
||||
elif is_qwen_style_reasoning_model(model_name, api_base_url):
|
||||
stream_processor = in_stream_thought_processor
|
||||
# Reasoning is enabled by default. Disable when deepthought is False.
|
||||
@@ -341,6 +343,8 @@ async def chat_completion_with_backoff(
|
||||
else:
|
||||
updated_messages.append(message)
|
||||
formatted_messages = updated_messages
|
||||
elif "kimi-k2-thinking" in model_name.lower():
|
||||
stream_processor = ain_stream_thought_processor
|
||||
elif is_qwen_style_reasoning_model(model_name, api_base_url):
|
||||
stream_processor = ain_stream_thought_processor
|
||||
# Reasoning is enabled by default. Disable when deepthought is False.
|
||||
|
||||
@@ -79,7 +79,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
||||
"grok-4": {"input": 3.0, "cache_read": 0.75, "output": 15.0},
|
||||
"grok-4-fast": {"input": 0.20, "cache_read": 0.05, "output": 0.50},
|
||||
# Groq pricing
|
||||
"moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00},
|
||||
"moonshotai/kimi-k2-instruct-0905": {"input": 1.00, "output": 3.00},
|
||||
"openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
|
||||
"openai/gpt-oss-20b": {"input": 0.10, "output": 0.50},
|
||||
# Miscellaneous
|
||||
# Moonshot AI, Baseten pricing for Kimi-K2-Thinking
|
||||
"moonshotai/kimi-k2-thinking": {"input": 0.60, "output": 2.50},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user