From 231aa1c0dfc73867e7bb3909a429513e8620a81f Mon Sep 17 00:00:00 2001 From: Debanjum Date: Thu, 22 May 2025 14:57:53 -0700 Subject: [PATCH] Support claude 4 models. Engage reasoning, operator. Track costs etc. - Engage reasoning when using claude 4 models - Allow claude 4 models as monolithic operator agents - Ease identifying which anthropic models can reason, operate GUIs - Track costs, set default context window of claude 4 models - Handle stop reason on calls to new claude 4 models --- pyproject.toml | 2 +- .../processor/conversation/anthropic/utils.py | 22 ++++++++++++-- src/khoj/processor/conversation/utils.py | 4 +++ .../processor/operator/operate_browser.py | 21 +++++++++++--- .../operator/operator_agent_anthropic.py | 29 +++++++++++++++---- src/khoj/utils/constants.py | 6 ++++ 6 files changed, 72 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 81fbea35..653dc936 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ dependencies = [ "pytz ~= 2024.1", "cron-descriptor == 1.4.3", "django_apscheduler == 0.7.0", - "anthropic == 0.49.0", + "anthropic == 0.52.0", "docx2txt == 0.8", "google-genai == 1.11.0", "google-auth ~= 2.23.3", diff --git a/src/khoj/processor/conversation/anthropic/utils.py b/src/khoj/processor/conversation/anthropic/utils.py index eb6c53aa..c466223e 100644 --- a/src/khoj/processor/conversation/anthropic/utils.py +++ b/src/khoj/processor/conversation/anthropic/utils.py @@ -33,6 +33,7 @@ anthropic_async_clients: Dict[str, anthropic.AsyncAnthropic | anthropic.AsyncAnt DEFAULT_MAX_TOKENS_ANTHROPIC = 8000 MAX_REASONING_TOKENS_ANTHROPIC = 12000 +REASONING_MODELS = ["claude-3-7", "claude-sonnet-4", "claude-opus-4"] @retry( @@ -73,7 +74,7 @@ def anthropic_completion_with_backoff( model_kwargs["system"] = system_prompt max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC - if deepthought and model_name.startswith("claude-3-7"): + if deepthought and is_reasoning_model(model_name): model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC} max_tokens += MAX_REASONING_TOKENS_ANTHROPIC # Temperature control not supported when using extended thinking @@ -139,7 +140,7 @@ async def anthropic_chat_completion_with_backoff( model_kwargs = model_kwargs or dict() max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC - if deepthought and model_name.startswith("claude-3-7"): + if deepthought and is_reasoning_model(model_name): model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC} max_tokens += MAX_REASONING_TOKENS_ANTHROPIC # Temperature control not supported when using extended thinking @@ -165,6 +166,19 @@ async def anthropic_chat_completion_with_backoff( if not response_started: response_started = True logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds") + if chunk.type == "message_delta": + if chunk.delta.stop_reason == "refusal": + yield ResponseWithThought( + response="...I'm sorry, but my safety filters prevent me from assisting with this query." + ) + elif chunk.delta.stop_reason == "max_tokens": + yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.") + if chunk.delta.stop_reason in ["refusal", "max_tokens"]: + logger.warning( + f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n" + + f"Last Message by {messages[-1].role}: {messages[-1].content}" + ) + break # Skip empty chunks if chunk.type != "content_block_delta": continue @@ -266,3 +280,7 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st ] return formatted_messages, system_prompt + + +def is_reasoning_model(model_name: str) -> bool: + return any(model_name.startswith(model) for model in REASONING_MODELS) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 6e5d2fb6..ba978429 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -73,6 +73,10 @@ model_to_prompt_size = { "claude-3-7-sonnet-20250219": 60000, "claude-3-7-sonnet-latest": 60000, "claude-3-5-haiku-20241022": 60000, + "claude-sonnet-4": 60000, + "claude-sonnet-4-20250514": 60000, + "claude-opus-4": 60000, + "claude-opus-4-20250514": 60000, # Offline Models "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, diff --git a/src/khoj/processor/operator/operate_browser.py b/src/khoj/processor/operator/operate_browser.py index f13fcdc3..9ed285a4 100644 --- a/src/khoj/processor/operator/operate_browser.py +++ b/src/khoj/processor/operator/operate_browser.py @@ -4,8 +4,6 @@ import logging import os from typing import Callable, List, Optional -import requests - from khoj.database.adapters import AgentAdapters, ConversationAdapters from khoj.database.models import Agent, ChatModel, KhojUser from khoj.processor.operator.operator_actions import * @@ -49,9 +47,9 @@ async def operate_browser( # Initialize Agent max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40)) operator_agent: OperatorAgent - if reasoning_model.name.startswith("gpt-4o"): + if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI: operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer) - elif reasoning_model.name.startswith("claude-3-7-sonnet"): + elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC: operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer) else: grounding_model_name = "ui-tars-1.5" @@ -150,3 +148,18 @@ async def operate_browser( "result": user_input_message or response, "webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls], } + + +def is_operator_model(model: str) -> ChatModel.ModelType | None: + """Check if the model is an operator model.""" + operator_models = { + "gpt-4o": ChatModel.ModelType.OPENAI, + "claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC, + "claude-sonnet-4": ChatModel.ModelType.ANTHROPIC, + "claude-opus-4": ChatModel.ModelType.ANTHROPIC, + "ui-tars-1.5": ChatModel.ModelType.OFFLINE, + } + for operator_model in operator_models: + if model.startswith(operator_model): + return operator_models[operator_model] # type: ignore[return-value] + return None diff --git a/src/khoj/processor/operator/operator_agent_anthropic.py b/src/khoj/processor/operator/operator_agent_anthropic.py index 3128f718..0b3c473d 100644 --- a/src/khoj/processor/operator/operator_agent_anthropic.py +++ b/src/khoj/processor/operator/operator_agent_anthropic.py @@ -3,10 +3,11 @@ import json import logging from copy import deepcopy from datetime import datetime -from typing import Any, List, Optional, cast +from typing import List, Optional, cast from anthropic.types.beta import BetaContentBlock +from khoj.processor.conversation.anthropic.utils import is_reasoning_model from khoj.processor.operator.operator_actions import * from khoj.processor.operator.operator_agent_base import ( AgentActResult, @@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent): client = get_anthropic_async_client( self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url ) - tool_version = "2025-01-24" - betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"] + betas = self.model_default_headers() temperature = 1.0 actions: List[OperatorAction] = [] action_results: List[dict] = [] @@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent): tools = [ { - "type": f"computer_20250124", + "type": self.model_default_tool("computer"), "name": "computer", "display_width_px": 1024, "display_height_px": 768, @@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent): ] thinking: dict[str, str | int] = {"type": "disabled"} - if self.vision_model.name.startswith("claude-3-7"): + if is_reasoning_model(self.vision_model.name): thinking = {"type": "enabled", "budget_tokens": 1024} messages_for_api = self._format_message_for_api(self.messages) @@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent): return None return coord + + def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str: + """Get the default tool of specified type for the given model.""" + if self.vision_model.name.startswith("claude-3-7-sonnet"): + if tool_type == "computer": + return "computer_20250124" + elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"): + if tool_type == "computer": + return "computer_20250124" + raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}") + + def model_default_headers(self) -> list[str]: + """Get the default computer use headers for the given model.""" + if self.vision_model.name.startswith("claude-3-7-sonnet"): + return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"] + elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"): + return ["computer-use-2025-01-24"] + else: + return [] diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py index af67e0a1..3cbaf45f 100644 --- a/src/khoj/utils/constants.py +++ b/src/khoj/utils/constants.py @@ -63,6 +63,12 @@ model_to_cost: Dict[str, Dict[str, float]] = { "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75}, "claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75}, "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75}, + "claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75}, + "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75}, + "claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75}, + "claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75}, + "claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75}, + "claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75}, # Grok pricing: https://docs.x.ai/docs/models "grok-3": {"input": 3.0, "output": 15.0}, "grok-3-latest": {"input": 3.0, "output": 15.0},