mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 21:29:11 +00:00
Improve support for new kimi k2 thinking model
Recognize thinking by kimi k2 thinking model in <think> xml blocks
This commit is contained in:
@@ -142,6 +142,8 @@ def completion_with_backoff(
|
|||||||
else:
|
else:
|
||||||
updated_messages.append(message)
|
updated_messages.append(message)
|
||||||
formatted_messages = updated_messages
|
formatted_messages = updated_messages
|
||||||
|
elif "kimi-k2-thinking" in model_name.lower():
|
||||||
|
stream_processor = in_stream_thought_processor
|
||||||
elif is_qwen_style_reasoning_model(model_name, api_base_url):
|
elif is_qwen_style_reasoning_model(model_name, api_base_url):
|
||||||
stream_processor = in_stream_thought_processor
|
stream_processor = in_stream_thought_processor
|
||||||
# Reasoning is enabled by default. Disable when deepthought is False.
|
# Reasoning is enabled by default. Disable when deepthought is False.
|
||||||
@@ -341,6 +343,8 @@ async def chat_completion_with_backoff(
|
|||||||
else:
|
else:
|
||||||
updated_messages.append(message)
|
updated_messages.append(message)
|
||||||
formatted_messages = updated_messages
|
formatted_messages = updated_messages
|
||||||
|
elif "kimi-k2-thinking" in model_name.lower():
|
||||||
|
stream_processor = ain_stream_thought_processor
|
||||||
elif is_qwen_style_reasoning_model(model_name, api_base_url):
|
elif is_qwen_style_reasoning_model(model_name, api_base_url):
|
||||||
stream_processor = ain_stream_thought_processor
|
stream_processor = ain_stream_thought_processor
|
||||||
# Reasoning is enabled by default. Disable when deepthought is False.
|
# Reasoning is enabled by default. Disable when deepthought is False.
|
||||||
|
|||||||
@@ -79,7 +79,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|||||||
"grok-4": {"input": 3.0, "cache_read": 0.75, "output": 15.0},
|
"grok-4": {"input": 3.0, "cache_read": 0.75, "output": 15.0},
|
||||||
"grok-4-fast": {"input": 0.20, "cache_read": 0.05, "output": 0.50},
|
"grok-4-fast": {"input": 0.20, "cache_read": 0.05, "output": 0.50},
|
||||||
# Groq pricing
|
# Groq pricing
|
||||||
"moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00},
|
"moonshotai/kimi-k2-instruct-0905": {"input": 1.00, "output": 3.00},
|
||||||
"openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
|
"openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
|
||||||
"openai/gpt-oss-20b": {"input": 0.10, "output": 0.50},
|
"openai/gpt-oss-20b": {"input": 0.10, "output": 0.50},
|
||||||
|
# Miscellaneous
|
||||||
|
# Moonshot AI, Baseten pricing for Kimi-K2-Thinking
|
||||||
|
"moonshotai/kimi-k2-thinking": {"input": 0.60, "output": 2.50},
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user