Improve support for new kimi k2 thinking model

Recognize thinking by kimi k2 thinking model in <think> xml blocks
2026-03-02 13:18:18 +00:00 · 2025-11-11 13:58:27 -08:00
parent 2c54a2cd10
commit 4556773f42
2 changed files with 8 additions and 1 deletions
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -142,6 +142,8 @@ def completion_with_backoff(
            else:
                updated_messages.append(message)
        formatted_messages = updated_messages
+    elif "kimi-k2-thinking" in model_name.lower():
+        stream_processor = in_stream_thought_processor
    elif is_qwen_style_reasoning_model(model_name, api_base_url):
        stream_processor = in_stream_thought_processor
        # Reasoning is enabled by default. Disable when deepthought is False.
@@ -341,6 +343,8 @@ async def chat_completion_with_backoff(
            else:
                updated_messages.append(message)
        formatted_messages = updated_messages
+    elif "kimi-k2-thinking" in model_name.lower():
+        stream_processor = ain_stream_thought_processor
    elif is_qwen_style_reasoning_model(model_name, api_base_url):
        stream_processor = ain_stream_thought_processor
        # Reasoning is enabled by default. Disable when deepthought is False.
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@@ -79,7 +79,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
    "grok-4": {"input": 3.0, "cache_read": 0.75, "output": 15.0},
    "grok-4-fast": {"input": 0.20, "cache_read": 0.05, "output": 0.50},
    # Groq pricing
-    "moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00},
+    "moonshotai/kimi-k2-instruct-0905": {"input": 1.00, "output": 3.00},
    "openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
    "openai/gpt-oss-20b": {"input": 0.10, "output": 0.50},
+    # Miscellaneous
+    # Moonshot AI, Baseten pricing for Kimi-K2-Thinking
+    "moonshotai/kimi-k2-thinking": {"input": 0.60, "output": 2.50},
 }