From 231aa1c0dfc73867e7bb3909a429513e8620a81f Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Thu, 22 May 2025 14:57:53 -0700
Subject: [PATCH] Support claude 4 models. Engage reasoning, operator. Track
 costs etc.

- Engage reasoning when using claude 4 models
- Allow claude 4 models as monolithic operator agents
- Ease identifying which anthropic models can reason, operate GUIs
- Track costs, set default context window of claude 4 models
- Handle stop reason on calls to new claude 4 models
---
 pyproject.toml                                |  2 +-
 .../processor/conversation/anthropic/utils.py | 22 ++++++++++++--
 src/khoj/processor/conversation/utils.py      |  4 +++
 .../processor/operator/operate_browser.py     | 21 +++++++++++---
 .../operator/operator_agent_anthropic.py      | 29 +++++++++++++++----
 src/khoj/utils/constants.py                   |  6 ++++
 6 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 81fbea35..653dc936 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,7 @@ dependencies = [
     "pytz ~= 2024.1",
     "cron-descriptor == 1.4.3",
     "django_apscheduler == 0.7.0",
-    "anthropic == 0.49.0",
+    "anthropic == 0.52.0",
     "docx2txt == 0.8",
     "google-genai == 1.11.0",
     "google-auth ~= 2.23.3",
diff --git a/src/khoj/processor/conversation/anthropic/utils.py b/src/khoj/processor/conversation/anthropic/utils.py
index eb6c53aa..c466223e 100644
--- a/src/khoj/processor/conversation/anthropic/utils.py
+++ b/src/khoj/processor/conversation/anthropic/utils.py
@@ -33,6 +33,7 @@ anthropic_async_clients: Dict[str, anthropic.AsyncAnthropic | anthropic.AsyncAnt
 
 DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
 MAX_REASONING_TOKENS_ANTHROPIC = 12000
+REASONING_MODELS = ["claude-3-7", "claude-sonnet-4", "claude-opus-4"]
 
 
 @retry(
@@ -73,7 +74,7 @@ def anthropic_completion_with_backoff(
         model_kwargs["system"] = system_prompt
 
     max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
-    if deepthought and model_name.startswith("claude-3-7"):
+    if deepthought and is_reasoning_model(model_name):
         model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
         max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
         # Temperature control not supported when using extended thinking
@@ -139,7 +140,7 @@ async def anthropic_chat_completion_with_backoff(
 
     model_kwargs = model_kwargs or dict()
     max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
-    if deepthought and model_name.startswith("claude-3-7"):
+    if deepthought and is_reasoning_model(model_name):
         model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
         max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
         # Temperature control not supported when using extended thinking
@@ -165,6 +166,19 @@ async def anthropic_chat_completion_with_backoff(
             if not response_started:
                 response_started = True
                 logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
+            if chunk.type == "message_delta":
+                if chunk.delta.stop_reason == "refusal":
+                    yield ResponseWithThought(
+                        response="...I'm sorry, but my safety filters prevent me from assisting with this query."
+                    )
+                elif chunk.delta.stop_reason == "max_tokens":
+                    yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
+                if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
+                    logger.warning(
+                        f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
+                        + f"Last Message by {messages[-1].role}: {messages[-1].content}"
+                    )
+                    break
             # Skip empty chunks
             if chunk.type != "content_block_delta":
                 continue
@@ -266,3 +280,7 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
     ]
 
     return formatted_messages, system_prompt
+
+
+def is_reasoning_model(model_name: str) -> bool:
+    return any(model_name.startswith(model) for model in REASONING_MODELS)
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 6e5d2fb6..ba978429 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -73,6 +73,10 @@ model_to_prompt_size = {
     "claude-3-7-sonnet-20250219": 60000,
     "claude-3-7-sonnet-latest": 60000,
     "claude-3-5-haiku-20241022": 60000,
+    "claude-sonnet-4": 60000,
+    "claude-sonnet-4-20250514": 60000,
+    "claude-opus-4": 60000,
+    "claude-opus-4-20250514": 60000,
     # Offline Models
     "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
     "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
diff --git a/src/khoj/processor/operator/operate_browser.py b/src/khoj/processor/operator/operate_browser.py
index f13fcdc3..9ed285a4 100644
--- a/src/khoj/processor/operator/operate_browser.py
+++ b/src/khoj/processor/operator/operate_browser.py
@@ -4,8 +4,6 @@ import logging
 import os
 from typing import Callable, List, Optional
 
-import requests
-
 from khoj.database.adapters import AgentAdapters, ConversationAdapters
 from khoj.database.models import Agent, ChatModel, KhojUser
 from khoj.processor.operator.operator_actions import *
@@ -49,9 +47,9 @@ async def operate_browser(
     # Initialize Agent
     max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
     operator_agent: OperatorAgent
-    if reasoning_model.name.startswith("gpt-4o"):
+    if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
         operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
-    elif reasoning_model.name.startswith("claude-3-7-sonnet"):
+    elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
         operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
     else:
         grounding_model_name = "ui-tars-1.5"
@@ -150,3 +148,18 @@ async def operate_browser(
         "result": user_input_message or response,
         "webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
     }
+
+
+def is_operator_model(model: str) -> ChatModel.ModelType | None:
+    """Check if the model is an operator model."""
+    operator_models = {
+        "gpt-4o": ChatModel.ModelType.OPENAI,
+        "claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
+        "claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
+        "claude-opus-4": ChatModel.ModelType.ANTHROPIC,
+        "ui-tars-1.5": ChatModel.ModelType.OFFLINE,
+    }
+    for operator_model in operator_models:
+        if model.startswith(operator_model):
+            return operator_models[operator_model]  # type: ignore[return-value]
+    return None
diff --git a/src/khoj/processor/operator/operator_agent_anthropic.py b/src/khoj/processor/operator/operator_agent_anthropic.py
index 3128f718..0b3c473d 100644
--- a/src/khoj/processor/operator/operator_agent_anthropic.py
+++ b/src/khoj/processor/operator/operator_agent_anthropic.py
@@ -3,10 +3,11 @@ import json
 import logging
 from copy import deepcopy
 from datetime import datetime
-from typing import Any, List, Optional, cast
+from typing import List, Optional, cast
 
 from anthropic.types.beta import BetaContentBlock
 
+from khoj.processor.conversation.anthropic.utils import is_reasoning_model
 from khoj.processor.operator.operator_actions import *
 from khoj.processor.operator.operator_agent_base import (
     AgentActResult,
@@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent):
         client = get_anthropic_async_client(
             self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
         )
-        tool_version = "2025-01-24"
-        betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"]
+        betas = self.model_default_headers()
         temperature = 1.0
         actions: List[OperatorAction] = []
         action_results: List[dict] = []
@@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent):
 
         tools = [
             {
-                "type": f"computer_20250124",
+                "type": self.model_default_tool("computer"),
                 "name": "computer",
                 "display_width_px": 1024,
                 "display_height_px": 768,
@@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent):
         ]
 
         thinking: dict[str, str | int] = {"type": "disabled"}
-        if self.vision_model.name.startswith("claude-3-7"):
+        if is_reasoning_model(self.vision_model.name):
             thinking = {"type": "enabled", "budget_tokens": 1024}
 
         messages_for_api = self._format_message_for_api(self.messages)
@@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent):
             return None
 
         return coord
+
+    def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str:
+        """Get the default tool of specified type for the given model."""
+        if self.vision_model.name.startswith("claude-3-7-sonnet"):
+            if tool_type == "computer":
+                return "computer_20250124"
+        elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
+            if tool_type == "computer":
+                return "computer_20250124"
+        raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}")
+
+    def model_default_headers(self) -> list[str]:
+        """Get the default computer use headers for the given model."""
+        if self.vision_model.name.startswith("claude-3-7-sonnet"):
+            return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
+        elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
+            return ["computer-use-2025-01-24"]
+        else:
+            return []
diff --git a/src/khoj/utils/constants.py b/src/khoj/utils/constants.py
index af67e0a1..3cbaf45f 100644
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@@ -63,6 +63,12 @@ model_to_cost: Dict[str, Dict[str, float]] = {
     "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
     "claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
     "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
+    "claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
+    "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
+    "claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
+    "claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
+    "claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
+    "claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
     # Grok pricing: https://docs.x.ai/docs/models
     "grok-3": {"input": 3.0, "output": 15.0},
     "grok-3-latest": {"input": 3.0, "output": 15.0},