diff --git a/pyproject.toml b/pyproject.toml index 5093fee7..d0e06809 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ dependencies = [ "pytz ~= 2024.1", "cron-descriptor == 1.4.3", "django_apscheduler == 0.6.2", - "anthropic == 0.26.1", + "anthropic == 0.49.0", "docx2txt == 0.8", "google-generativeai == 0.8.3", "pyjson5 == 1.6.7", diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index 4b1a2bd8..8b16ac02 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) def extract_questions_anthropic( text, - model: Optional[str] = "claude-instant-1.2", + model: Optional[str] = "claude-3-7-sonnet-latest", conversation_log={}, api_key=None, temperature=0.7, @@ -122,7 +122,7 @@ def extract_questions_anthropic( return questions -def anthropic_send_message_to_model(messages, api_key, model, response_type="text", tracer={}): +def anthropic_send_message_to_model(messages, api_key, model, response_type="text", deepthought=False, tracer={}): """ Send message to model """ @@ -135,6 +135,7 @@ def anthropic_send_message_to_model(messages, api_key, model, response_type="tex model_name=model, api_key=api_key, response_type=response_type, + deepthought=deepthought, tracer=tracer, ) @@ -145,7 +146,7 @@ def converse_anthropic( online_results: Optional[Dict[str, Dict]] = None, code_results: Optional[Dict[str, Dict]] = None, conversation_log={}, - model: Optional[str] = "claude-3-5-sonnet-20241022", + model: Optional[str] = "claude-3-7-sonnet-latest", api_key: Optional[str] = None, completion_func=None, conversation_commands=[ConversationCommand.Default], @@ -160,6 +161,7 @@ def converse_anthropic( generated_files: List[FileAttachment] = None, program_execution_context: Optional[List[str]] = None, generated_asset_results: Dict[str, Dict] = {}, + deepthought: Optional[bool] = False, tracer: dict = {}, ): """ @@ -239,5 +241,6 @@ def converse_anthropic( system_prompt=system_prompt, completion_func=completion_func, max_prompt_size=max_prompt_size, + deepthought=deepthought, tracer=tracer, ) diff --git a/src/khoj/processor/conversation/anthropic/utils.py b/src/khoj/processor/conversation/anthropic/utils.py index 3fee8b43..b022d0e2 100644 --- a/src/khoj/processor/conversation/anthropic/utils.py +++ b/src/khoj/processor/conversation/anthropic/utils.py @@ -17,10 +17,8 @@ from khoj.processor.conversation.utils import ( commit_conversation_trace, get_image_from_url, ) -from khoj.utils import state from khoj.utils.helpers import ( get_chat_usage_metrics, - in_debug_mode, is_none_or_empty, is_promptrace_enabled, ) @@ -30,7 +28,8 @@ logger = logging.getLogger(__name__) anthropic_clients: Dict[str, anthropic.Anthropic] = {} -DEFAULT_MAX_TOKENS_ANTHROPIC = 3000 +DEFAULT_MAX_TOKENS_ANTHROPIC = 8000 +MAX_REASONING_TOKENS_ANTHROPIC = 12000 @retry( @@ -42,12 +41,13 @@ DEFAULT_MAX_TOKENS_ANTHROPIC = 3000 def anthropic_completion_with_backoff( messages, system_prompt, - model_name, + model_name: str, temperature=0, api_key=None, model_kwargs=None, max_tokens=None, response_type="text", + deepthought=False, tracer={}, ) -> str: if api_key not in anthropic_clients: @@ -57,18 +57,24 @@ def anthropic_completion_with_backoff( client = anthropic_clients[api_key] formatted_messages = [{"role": message.role, "content": message.content} for message in messages] - if response_type == "json_object": - # Prefill model response with '{' to make it output a valid JSON object + aggregated_response = "" + if response_type == "json_object" and not deepthought: + # Prefill model response with '{' to make it output a valid JSON object. Not supported with extended thinking. formatted_messages += [{"role": "assistant", "content": "{"}] - - aggregated_response = "{" if response_type == "json_object" else "" - max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC + aggregated_response += "{" final_message = None model_kwargs = model_kwargs or dict() if system_prompt: model_kwargs["system"] = system_prompt + max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC + if deepthought and model_name.startswith("claude-3-7"): + model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC} + max_tokens += MAX_REASONING_TOKENS_ANTHROPIC + # Temperature control not supported when using extended thinking + temperature = 1.0 + with client.messages.stream( messages=formatted_messages, model=model_name, # type: ignore @@ -111,20 +117,41 @@ def anthropic_chat_completion_with_backoff( system_prompt, max_prompt_size=None, completion_func=None, + deepthought=False, model_kwargs=None, tracer={}, ): g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func) t = Thread( target=anthropic_llm_thread, - args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs, tracer), + args=( + g, + messages, + system_prompt, + model_name, + temperature, + api_key, + max_prompt_size, + deepthought, + model_kwargs, + tracer, + ), ) t.start() return g def anthropic_llm_thread( - g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None, tracer={} + g, + messages, + system_prompt, + model_name, + temperature, + api_key, + max_prompt_size=None, + deepthought=False, + model_kwargs=None, + tracer={}, ): try: if api_key not in anthropic_clients: @@ -133,6 +160,14 @@ def anthropic_llm_thread( else: client: anthropic.Anthropic = anthropic_clients[api_key] + model_kwargs = model_kwargs or dict() + max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC + if deepthought and model_name.startswith("claude-3-7"): + model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC} + max_tokens += MAX_REASONING_TOKENS_ANTHROPIC + # Temperature control not supported when using extended thinking + temperature = 1.0 + formatted_messages: List[anthropic.types.MessageParam] = [ anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages ] @@ -145,8 +180,8 @@ def anthropic_llm_thread( temperature=temperature, system=system_prompt, timeout=20, - max_tokens=DEFAULT_MAX_TOKENS_ANTHROPIC, - **(model_kwargs or dict()), + max_tokens=max_tokens, + **model_kwargs, ) as stream: for text in stream.text_stream: aggregated_response += text diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 695ad5e4..015bbe6f 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -61,6 +61,9 @@ model_to_prompt_size = { "gemini-1.5-pro": 60000, # Anthropic Models "claude-3-5-sonnet-20241022": 60000, + "claude-3-5-sonnet-latest": 60000, + "claude-3-7-sonnet-20250219": 60000, + "claude-3-7-sonnet-latest": 60000, "claude-3-5-haiku-20241022": 60000, # Offline Models "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000, diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 8c8a009e..2bc392a0 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -1125,6 +1125,7 @@ async def send_message_to_model_wrapper( query: str, system_message: str = "", response_type: str = "text", + deepthought: bool = False, user: KhojUser = None, query_images: List[str] = None, context: str = "", @@ -1227,6 +1228,7 @@ async def send_message_to_model_wrapper( api_key=api_key, model=chat_model_name, response_type=response_type, + deepthought=deepthought, tracer=tracer, ) elif model_type == ChatModel.ModelType.GOOGLE: @@ -1425,11 +1427,13 @@ def generate_chat_response( ) query_to_run = q + deepthought = False if meta_research: query_to_run = f"{q}\n\n{meta_research}\n" compiled_references = [] online_results = {} code_results = {} + deepthought = True chat_model = ConversationAdapters.get_valid_chat_model(user, conversation, is_subscribed) vision_available = chat_model.vision_enabled @@ -1513,6 +1517,7 @@ def generate_chat_response( generated_files=raw_generated_files, generated_asset_results=generated_asset_results, program_execution_context=program_execution_context, + deepthought=deepthought, tracer=tracer, ) elif chat_model.model_type == ChatModel.ModelType.GOOGLE: diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index d67e6554..e040c928 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -95,6 +95,7 @@ async def apick_next_tool( query=query, context=function_planning_prompt, response_type="json_object", + deepthought=True, user=user, query_images=query_images, query_files=query_files, diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index 74c06172..b3d14f18 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -48,6 +48,9 @@ model_to_cost: Dict[str, Dict[str, float]] = { "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00}, "gemini-2.0-flash": {"input": 0.10, "output": 0.40}, # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api_ - "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0}, "claude-3-5-haiku-20241022": {"input": 1.0, "output": 5.0}, + "claude-3-5-sonnet-20241022": {"input": 3.0, "output": 15.0}, + "claude-3-5-sonnet-latest": {"input": 3.0, "output": 15.0}, + "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0}, + "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0}, }