Support claude 4 models. Engage reasoning, operator. Track costs etc.

- Engage reasoning when using claude 4 models - Allow claude 4 models as monolithic operator agents - Ease identifying which anthropic models can reason, operate GUIs - Track costs, set default context window of claude 4 models - Handle stop reason on calls to new claude 4 models
2026-03-09 05:39:12 +00:00 · 2025-05-22 14:57:53 -07:00
parent dca17591f3
commit 231aa1c0df
6 changed files with 72 additions and 12 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,7 @@ dependencies = [
    "pytz ~= 2024.1",
    "cron-descriptor == 1.4.3",
    "django_apscheduler == 0.7.0",
-    "anthropic == 0.49.0",
+    "anthropic == 0.52.0",
    "docx2txt == 0.8",
    "google-genai == 1.11.0",
    "google-auth ~= 2.23.3",
--- a/src/khoj/processor/conversation/anthropic/utils.py
+++ b/src/khoj/processor/conversation/anthropic/utils.py
@@ -33,6 +33,7 @@ anthropic_async_clients: Dict[str, anthropic.AsyncAnthropic | anthropic.AsyncAnt
 DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
 MAX_REASONING_TOKENS_ANTHROPIC = 12000
 REASONING_MODELS = ["claude-3-7", "claude-sonnet-4", "claude-opus-4"]
@retry(
@@ -73,7 +74,7 @@ def anthropic_completion_with_backoff(
        model_kwargs["system"] = system_prompt
    max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
-    if deepthought and model_name.startswith("claude-3-7"):
+    if deepthought and is_reasoning_model(model_name):
        model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
        max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
        # Temperature control not supported when using extended thinking
@@ -139,7 +140,7 @@ async def anthropic_chat_completion_with_backoff(
    model_kwargs = model_kwargs or dict()
    max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
-    if deepthought and model_name.startswith("claude-3-7"):
+    if deepthought and is_reasoning_model(model_name):
        model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
        max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
        # Temperature control not supported when using extended thinking
@@ -165,6 +166,19 @@ async def anthropic_chat_completion_with_backoff(
            if not response_started:
                response_started = True
                logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
            if chunk.type == "message_delta":
                if chunk.delta.stop_reason == "refusal":
                    yield ResponseWithThought(
                        response="...I'm sorry, but my safety filters prevent me from assisting with this query."
                    )
                elif chunk.delta.stop_reason == "max_tokens":
                    yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
                if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
                    logger.warning(
                        f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
                        + f"Last Message by {messages[-1].role}: {messages[-1].content}"
                    )
                    break
            # Skip empty chunks
            if chunk.type != "content_block_delta":
                continue
@@ -266,3 +280,7 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
    ]
    return formatted_messages, system_prompt
 def is_reasoning_model(model_name: str) -> bool:
    return any(model_name.startswith(model) for model in REASONING_MODELS)
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -73,6 +73,10 @@ model_to_prompt_size = {
    "claude-3-7-sonnet-20250219": 60000,
    "claude-3-7-sonnet-latest": 60000,
    "claude-3-5-haiku-20241022": 60000,
    "claude-sonnet-4": 60000,
    "claude-sonnet-4-20250514": 60000,
    "claude-opus-4": 60000,
    "claude-opus-4-20250514": 60000,
    # Offline Models
    "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
    "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
--- a/src/khoj/processor/operator/operate_browser.py
+++ b/src/khoj/processor/operator/operate_browser.py
@@ -4,8 +4,6 @@ import logging
 import os
 from typing import Callable, List, Optional
 import requests
 from khoj.database.adapters import AgentAdapters, ConversationAdapters
 from khoj.database.models import Agent, ChatModel, KhojUser
 from khoj.processor.operator.operator_actions import *
@@ -49,9 +47,9 @@ async def operate_browser(
    # Initialize Agent
    max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
    operator_agent: OperatorAgent
-    if reasoning_model.name.startswith("gpt-4o"):
+    if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
        operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
-    elif reasoning_model.name.startswith("claude-3-7-sonnet"):
+    elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
        operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
    else:
        grounding_model_name = "ui-tars-1.5"
@@ -150,3 +148,18 @@ async def operate_browser(
        "result": user_input_message or response,
        "webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
    }
 def is_operator_model(model: str) -> ChatModel.ModelType | None:
    """Check if the model is an operator model."""
    operator_models = {
        "gpt-4o": ChatModel.ModelType.OPENAI,
        "claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
        "claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
        "claude-opus-4": ChatModel.ModelType.ANTHROPIC,
        "ui-tars-1.5": ChatModel.ModelType.OFFLINE,
    }
    for operator_model in operator_models:
        if model.startswith(operator_model):
            return operator_models[operator_model]  # type: ignore[return-value]
    return None
--- a/src/khoj/processor/operator/operator_agent_anthropic.py
+++ b/src/khoj/processor/operator/operator_agent_anthropic.py
@@ -3,10 +3,11 @@ import json
 import logging
 from copy import deepcopy
 from datetime import datetime
-from typing import Any, List, Optional, cast
+from typing import List, Optional, cast
 from anthropic.types.beta import BetaContentBlock
 from khoj.processor.conversation.anthropic.utils import is_reasoning_model
 from khoj.processor.operator.operator_actions import *
 from khoj.processor.operator.operator_agent_base import (
    AgentActResult,
@@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent):
        client = get_anthropic_async_client(
            self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
        )
-        tool_version = "2025-01-24"
+        betas = self.model_default_headers()
        betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"]
        temperature = 1.0
        actions: List[OperatorAction] = []
        action_results: List[dict] = []
@@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent):
        tools = [
            {
-                "type": f"computer_20250124",
+                "type": self.model_default_tool("computer"),
                "name": "computer",
                "display_width_px": 1024,
                "display_height_px": 768,
@@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent):
        ]
        thinking: dict[str, str | int] = {"type": "disabled"}
-        if self.vision_model.name.startswith("claude-3-7"):
+        if is_reasoning_model(self.vision_model.name):
            thinking = {"type": "enabled", "budget_tokens": 1024}
        messages_for_api = self._format_message_for_api(self.messages)
@@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent):
            return None
        return coord
    def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str:
        """Get the default tool of specified type for the given model."""
        if self.vision_model.name.startswith("claude-3-7-sonnet"):
            if tool_type == "computer":
                return "computer_20250124"
        elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
            if tool_type == "computer":
                return "computer_20250124"
        raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}")
    def model_default_headers(self) -> list[str]:
        """Get the default computer use headers for the given model."""
        if self.vision_model.name.startswith("claude-3-7-sonnet"):
            return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
        elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
            return ["computer-use-2025-01-24"]
        else:
            return []
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@@ -63,6 +63,12 @@ model_to_cost: Dict[str, Dict[str, float]] = {
    "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
    "claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
    "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
    "claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
    "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
    "claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
    "claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
    "claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
    "claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
    # Grok pricing: https://docs.x.ai/docs/models
    "grok-3": {"input": 3.0, "output": 15.0},
    "grok-3-latest": {"input": 3.0, "output": 15.0},