Support claude 4 models. Engage reasoning, operator. Track costs etc.

- Engage reasoning when using claude 4 models
- Allow claude 4 models as monolithic operator agents
- Ease identifying which anthropic models can reason, operate GUIs
- Track costs, set default context window of claude 4 models
- Handle stop reason on calls to new claude 4 models
This commit is contained in:
Debanjum
2025-05-22 14:57:53 -07:00
parent dca17591f3
commit 231aa1c0df
6 changed files with 72 additions and 12 deletions

View File

@@ -85,7 +85,7 @@ dependencies = [
"pytz ~= 2024.1",
"cron-descriptor == 1.4.3",
"django_apscheduler == 0.7.0",
"anthropic == 0.49.0",
"anthropic == 0.52.0",
"docx2txt == 0.8",
"google-genai == 1.11.0",
"google-auth ~= 2.23.3",

View File

@@ -33,6 +33,7 @@ anthropic_async_clients: Dict[str, anthropic.AsyncAnthropic | anthropic.AsyncAnt
DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
MAX_REASONING_TOKENS_ANTHROPIC = 12000
REASONING_MODELS = ["claude-3-7", "claude-sonnet-4", "claude-opus-4"]
@retry(
@@ -73,7 +74,7 @@ def anthropic_completion_with_backoff(
model_kwargs["system"] = system_prompt
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
if deepthought and model_name.startswith("claude-3-7"):
if deepthought and is_reasoning_model(model_name):
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
# Temperature control not supported when using extended thinking
@@ -139,7 +140,7 @@ async def anthropic_chat_completion_with_backoff(
model_kwargs = model_kwargs or dict()
max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
if deepthought and model_name.startswith("claude-3-7"):
if deepthought and is_reasoning_model(model_name):
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
# Temperature control not supported when using extended thinking
@@ -165,6 +166,19 @@ async def anthropic_chat_completion_with_backoff(
if not response_started:
response_started = True
logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
if chunk.type == "message_delta":
if chunk.delta.stop_reason == "refusal":
yield ResponseWithThought(
response="...I'm sorry, but my safety filters prevent me from assisting with this query."
)
elif chunk.delta.stop_reason == "max_tokens":
yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
logger.warning(
f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
)
break
# Skip empty chunks
if chunk.type != "content_block_delta":
continue
@@ -266,3 +280,7 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
]
return formatted_messages, system_prompt
def is_reasoning_model(model_name: str) -> bool:
return any(model_name.startswith(model) for model in REASONING_MODELS)

View File

@@ -73,6 +73,10 @@ model_to_prompt_size = {
"claude-3-7-sonnet-20250219": 60000,
"claude-3-7-sonnet-latest": 60000,
"claude-3-5-haiku-20241022": 60000,
"claude-sonnet-4": 60000,
"claude-sonnet-4-20250514": 60000,
"claude-opus-4": 60000,
"claude-opus-4-20250514": 60000,
# Offline Models
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,

View File

@@ -4,8 +4,6 @@ import logging
import os
from typing import Callable, List, Optional
import requests
from khoj.database.adapters import AgentAdapters, ConversationAdapters
from khoj.database.models import Agent, ChatModel, KhojUser
from khoj.processor.operator.operator_actions import *
@@ -49,9 +47,9 @@ async def operate_browser(
# Initialize Agent
max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
operator_agent: OperatorAgent
if reasoning_model.name.startswith("gpt-4o"):
if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
elif reasoning_model.name.startswith("claude-3-7-sonnet"):
elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
else:
grounding_model_name = "ui-tars-1.5"
@@ -150,3 +148,18 @@ async def operate_browser(
"result": user_input_message or response,
"webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
}
def is_operator_model(model: str) -> ChatModel.ModelType | None:
"""Check if the model is an operator model."""
operator_models = {
"gpt-4o": ChatModel.ModelType.OPENAI,
"claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
"claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
"claude-opus-4": ChatModel.ModelType.ANTHROPIC,
"ui-tars-1.5": ChatModel.ModelType.OFFLINE,
}
for operator_model in operator_models:
if model.startswith(operator_model):
return operator_models[operator_model] # type: ignore[return-value]
return None

View File

@@ -3,10 +3,11 @@ import json
import logging
from copy import deepcopy
from datetime import datetime
from typing import Any, List, Optional, cast
from typing import List, Optional, cast
from anthropic.types.beta import BetaContentBlock
from khoj.processor.conversation.anthropic.utils import is_reasoning_model
from khoj.processor.operator.operator_actions import *
from khoj.processor.operator.operator_agent_base import (
AgentActResult,
@@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent):
client = get_anthropic_async_client(
self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
)
tool_version = "2025-01-24"
betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"]
betas = self.model_default_headers()
temperature = 1.0
actions: List[OperatorAction] = []
action_results: List[dict] = []
@@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent):
tools = [
{
"type": f"computer_20250124",
"type": self.model_default_tool("computer"),
"name": "computer",
"display_width_px": 1024,
"display_height_px": 768,
@@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent):
]
thinking: dict[str, str | int] = {"type": "disabled"}
if self.vision_model.name.startswith("claude-3-7"):
if is_reasoning_model(self.vision_model.name):
thinking = {"type": "enabled", "budget_tokens": 1024}
messages_for_api = self._format_message_for_api(self.messages)
@@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent):
return None
return coord
def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str:
"""Get the default tool of specified type for the given model."""
if self.vision_model.name.startswith("claude-3-7-sonnet"):
if tool_type == "computer":
return "computer_20250124"
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
if tool_type == "computer":
return "computer_20250124"
raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}")
def model_default_headers(self) -> list[str]:
"""Get the default computer use headers for the given model."""
if self.vision_model.name.startswith("claude-3-7-sonnet"):
return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
return ["computer-use-2025-01-24"]
else:
return []

View File

@@ -63,6 +63,12 @@ model_to_cost: Dict[str, Dict[str, float]] = {
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
"claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
"claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
"claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
"claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
"claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
"claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
"claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
# Grok pricing: https://docs.x.ai/docs/models
"grok-3": {"input": 3.0, "output": 15.0},
"grok-3-latest": {"input": 3.0, "output": 15.0},