mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Support claude 4 models. Engage reasoning, operator. Track costs etc.
- Engage reasoning when using claude 4 models - Allow claude 4 models as monolithic operator agents - Ease identifying which anthropic models can reason, operate GUIs - Track costs, set default context window of claude 4 models - Handle stop reason on calls to new claude 4 models
This commit is contained in:
@@ -85,7 +85,7 @@ dependencies = [
|
||||
"pytz ~= 2024.1",
|
||||
"cron-descriptor == 1.4.3",
|
||||
"django_apscheduler == 0.7.0",
|
||||
"anthropic == 0.49.0",
|
||||
"anthropic == 0.52.0",
|
||||
"docx2txt == 0.8",
|
||||
"google-genai == 1.11.0",
|
||||
"google-auth ~= 2.23.3",
|
||||
|
||||
@@ -33,6 +33,7 @@ anthropic_async_clients: Dict[str, anthropic.AsyncAnthropic | anthropic.AsyncAnt
|
||||
|
||||
DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
|
||||
MAX_REASONING_TOKENS_ANTHROPIC = 12000
|
||||
REASONING_MODELS = ["claude-3-7", "claude-sonnet-4", "claude-opus-4"]
|
||||
|
||||
|
||||
@retry(
|
||||
@@ -73,7 +74,7 @@ def anthropic_completion_with_backoff(
|
||||
model_kwargs["system"] = system_prompt
|
||||
|
||||
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
||||
if deepthought and model_name.startswith("claude-3-7"):
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
||||
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
||||
# Temperature control not supported when using extended thinking
|
||||
@@ -139,7 +140,7 @@ async def anthropic_chat_completion_with_backoff(
|
||||
|
||||
model_kwargs = model_kwargs or dict()
|
||||
max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
|
||||
if deepthought and model_name.startswith("claude-3-7"):
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
||||
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
||||
# Temperature control not supported when using extended thinking
|
||||
@@ -165,6 +166,19 @@ async def anthropic_chat_completion_with_backoff(
|
||||
if not response_started:
|
||||
response_started = True
|
||||
logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
|
||||
if chunk.type == "message_delta":
|
||||
if chunk.delta.stop_reason == "refusal":
|
||||
yield ResponseWithThought(
|
||||
response="...I'm sorry, but my safety filters prevent me from assisting with this query."
|
||||
)
|
||||
elif chunk.delta.stop_reason == "max_tokens":
|
||||
yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
|
||||
if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
|
||||
logger.warning(
|
||||
f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
|
||||
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
||||
)
|
||||
break
|
||||
# Skip empty chunks
|
||||
if chunk.type != "content_block_delta":
|
||||
continue
|
||||
@@ -266,3 +280,7 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
||||
]
|
||||
|
||||
return formatted_messages, system_prompt
|
||||
|
||||
|
||||
def is_reasoning_model(model_name: str) -> bool:
|
||||
return any(model_name.startswith(model) for model in REASONING_MODELS)
|
||||
|
||||
@@ -73,6 +73,10 @@ model_to_prompt_size = {
|
||||
"claude-3-7-sonnet-20250219": 60000,
|
||||
"claude-3-7-sonnet-latest": 60000,
|
||||
"claude-3-5-haiku-20241022": 60000,
|
||||
"claude-sonnet-4": 60000,
|
||||
"claude-sonnet-4-20250514": 60000,
|
||||
"claude-opus-4": 60000,
|
||||
"claude-opus-4-20250514": 60000,
|
||||
# Offline Models
|
||||
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
|
||||
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
||||
|
||||
@@ -4,8 +4,6 @@ import logging
|
||||
import os
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from khoj.database.adapters import AgentAdapters, ConversationAdapters
|
||||
from khoj.database.models import Agent, ChatModel, KhojUser
|
||||
from khoj.processor.operator.operator_actions import *
|
||||
@@ -49,9 +47,9 @@ async def operate_browser(
|
||||
# Initialize Agent
|
||||
max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
|
||||
operator_agent: OperatorAgent
|
||||
if reasoning_model.name.startswith("gpt-4o"):
|
||||
if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
|
||||
operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
|
||||
elif reasoning_model.name.startswith("claude-3-7-sonnet"):
|
||||
elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
|
||||
operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
|
||||
else:
|
||||
grounding_model_name = "ui-tars-1.5"
|
||||
@@ -150,3 +148,18 @@ async def operate_browser(
|
||||
"result": user_input_message or response,
|
||||
"webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
|
||||
}
|
||||
|
||||
|
||||
def is_operator_model(model: str) -> ChatModel.ModelType | None:
|
||||
"""Check if the model is an operator model."""
|
||||
operator_models = {
|
||||
"gpt-4o": ChatModel.ModelType.OPENAI,
|
||||
"claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
|
||||
"claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
|
||||
"claude-opus-4": ChatModel.ModelType.ANTHROPIC,
|
||||
"ui-tars-1.5": ChatModel.ModelType.OFFLINE,
|
||||
}
|
||||
for operator_model in operator_models:
|
||||
if model.startswith(operator_model):
|
||||
return operator_models[operator_model] # type: ignore[return-value]
|
||||
return None
|
||||
|
||||
@@ -3,10 +3,11 @@ import json
|
||||
import logging
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from typing import Any, List, Optional, cast
|
||||
from typing import List, Optional, cast
|
||||
|
||||
from anthropic.types.beta import BetaContentBlock
|
||||
|
||||
from khoj.processor.conversation.anthropic.utils import is_reasoning_model
|
||||
from khoj.processor.operator.operator_actions import *
|
||||
from khoj.processor.operator.operator_agent_base import (
|
||||
AgentActResult,
|
||||
@@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
||||
client = get_anthropic_async_client(
|
||||
self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
|
||||
)
|
||||
tool_version = "2025-01-24"
|
||||
betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"]
|
||||
betas = self.model_default_headers()
|
||||
temperature = 1.0
|
||||
actions: List[OperatorAction] = []
|
||||
action_results: List[dict] = []
|
||||
@@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": f"computer_20250124",
|
||||
"type": self.model_default_tool("computer"),
|
||||
"name": "computer",
|
||||
"display_width_px": 1024,
|
||||
"display_height_px": 768,
|
||||
@@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
||||
]
|
||||
|
||||
thinking: dict[str, str | int] = {"type": "disabled"}
|
||||
if self.vision_model.name.startswith("claude-3-7"):
|
||||
if is_reasoning_model(self.vision_model.name):
|
||||
thinking = {"type": "enabled", "budget_tokens": 1024}
|
||||
|
||||
messages_for_api = self._format_message_for_api(self.messages)
|
||||
@@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent):
|
||||
return None
|
||||
|
||||
return coord
|
||||
|
||||
def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str:
|
||||
"""Get the default tool of specified type for the given model."""
|
||||
if self.vision_model.name.startswith("claude-3-7-sonnet"):
|
||||
if tool_type == "computer":
|
||||
return "computer_20250124"
|
||||
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
|
||||
if tool_type == "computer":
|
||||
return "computer_20250124"
|
||||
raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}")
|
||||
|
||||
def model_default_headers(self) -> list[str]:
|
||||
"""Get the default computer use headers for the given model."""
|
||||
if self.vision_model.name.startswith("claude-3-7-sonnet"):
|
||||
return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
|
||||
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
|
||||
return ["computer-use-2025-01-24"]
|
||||
else:
|
||||
return []
|
||||
|
||||
@@ -63,6 +63,12 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
||||
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||
"claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||
"claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||
"claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||
"claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||
"claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
|
||||
"claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
|
||||
"claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
|
||||
# Grok pricing: https://docs.x.ai/docs/models
|
||||
"grok-3": {"input": 3.0, "output": 15.0},
|
||||
"grok-3-latest": {"input": 3.0, "output": 15.0},
|
||||
|
||||
Reference in New Issue
Block a user