diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index f0048baf..45485f18 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -142,6 +142,8 @@ def completion_with_backoff( else: updated_messages.append(message) formatted_messages = updated_messages + elif "kimi-k2-thinking" in model_name.lower(): + stream_processor = in_stream_thought_processor elif is_qwen_style_reasoning_model(model_name, api_base_url): stream_processor = in_stream_thought_processor # Reasoning is enabled by default. Disable when deepthought is False. @@ -341,6 +343,8 @@ async def chat_completion_with_backoff( else: updated_messages.append(message) formatted_messages = updated_messages + elif "kimi-k2-thinking" in model_name.lower(): + stream_processor = ain_stream_thought_processor elif is_qwen_style_reasoning_model(model_name, api_base_url): stream_processor = ain_stream_thought_processor # Reasoning is enabled by default. Disable when deepthought is False. diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index 354e9bcf..d283c0b1 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -79,7 +79,10 @@ model_to_cost: Dict[str, Dict[str, float]] = { "grok-4": {"input": 3.0, "cache_read": 0.75, "output": 15.0}, "grok-4-fast": {"input": 0.20, "cache_read": 0.05, "output": 0.50}, # Groq pricing - "moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00}, + "moonshotai/kimi-k2-instruct-0905": {"input": 1.00, "output": 3.00}, "openai/gpt-oss-120b": {"input": 0.15, "output": 0.75}, "openai/gpt-oss-20b": {"input": 0.10, "output": 0.50}, + # Miscellaneous + # Moonshot AI, Baseten pricing for Kimi-K2-Thinking + "moonshotai/kimi-k2-thinking": {"input": 0.60, "output": 2.50}, }