mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Support claude 4 models. Engage reasoning, operator. Track costs etc.
- Engage reasoning when using claude 4 models - Allow claude 4 models as monolithic operator agents - Ease identifying which anthropic models can reason, operate GUIs - Track costs, set default context window of claude 4 models - Handle stop reason on calls to new claude 4 models
This commit is contained in:
@@ -85,7 +85,7 @@ dependencies = [
|
|||||||
"pytz ~= 2024.1",
|
"pytz ~= 2024.1",
|
||||||
"cron-descriptor == 1.4.3",
|
"cron-descriptor == 1.4.3",
|
||||||
"django_apscheduler == 0.7.0",
|
"django_apscheduler == 0.7.0",
|
||||||
"anthropic == 0.49.0",
|
"anthropic == 0.52.0",
|
||||||
"docx2txt == 0.8",
|
"docx2txt == 0.8",
|
||||||
"google-genai == 1.11.0",
|
"google-genai == 1.11.0",
|
||||||
"google-auth ~= 2.23.3",
|
"google-auth ~= 2.23.3",
|
||||||
|
|||||||
@@ -33,6 +33,7 @@ anthropic_async_clients: Dict[str, anthropic.AsyncAnthropic | anthropic.AsyncAnt
|
|||||||
|
|
||||||
DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
|
DEFAULT_MAX_TOKENS_ANTHROPIC = 8000
|
||||||
MAX_REASONING_TOKENS_ANTHROPIC = 12000
|
MAX_REASONING_TOKENS_ANTHROPIC = 12000
|
||||||
|
REASONING_MODELS = ["claude-3-7", "claude-sonnet-4", "claude-opus-4"]
|
||||||
|
|
||||||
|
|
||||||
@retry(
|
@retry(
|
||||||
@@ -73,7 +74,7 @@ def anthropic_completion_with_backoff(
|
|||||||
model_kwargs["system"] = system_prompt
|
model_kwargs["system"] = system_prompt
|
||||||
|
|
||||||
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
||||||
if deepthought and model_name.startswith("claude-3-7"):
|
if deepthought and is_reasoning_model(model_name):
|
||||||
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
||||||
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
||||||
# Temperature control not supported when using extended thinking
|
# Temperature control not supported when using extended thinking
|
||||||
@@ -139,7 +140,7 @@ async def anthropic_chat_completion_with_backoff(
|
|||||||
|
|
||||||
model_kwargs = model_kwargs or dict()
|
model_kwargs = model_kwargs or dict()
|
||||||
max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
|
max_tokens = DEFAULT_MAX_TOKENS_ANTHROPIC
|
||||||
if deepthought and model_name.startswith("claude-3-7"):
|
if deepthought and is_reasoning_model(model_name):
|
||||||
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
||||||
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
||||||
# Temperature control not supported when using extended thinking
|
# Temperature control not supported when using extended thinking
|
||||||
@@ -165,6 +166,19 @@ async def anthropic_chat_completion_with_backoff(
|
|||||||
if not response_started:
|
if not response_started:
|
||||||
response_started = True
|
response_started = True
|
||||||
logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
|
logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
|
||||||
|
if chunk.type == "message_delta":
|
||||||
|
if chunk.delta.stop_reason == "refusal":
|
||||||
|
yield ResponseWithThought(
|
||||||
|
response="...I'm sorry, but my safety filters prevent me from assisting with this query."
|
||||||
|
)
|
||||||
|
elif chunk.delta.stop_reason == "max_tokens":
|
||||||
|
yield ResponseWithThought(response="...I'm sorry, but I've hit my response length limit.")
|
||||||
|
if chunk.delta.stop_reason in ["refusal", "max_tokens"]:
|
||||||
|
logger.warning(
|
||||||
|
f"LLM Response Prevented for {model_name}: {chunk.delta.stop_reason}.\n"
|
||||||
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
||||||
|
)
|
||||||
|
break
|
||||||
# Skip empty chunks
|
# Skip empty chunks
|
||||||
if chunk.type != "content_block_delta":
|
if chunk.type != "content_block_delta":
|
||||||
continue
|
continue
|
||||||
@@ -266,3 +280,7 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|||||||
]
|
]
|
||||||
|
|
||||||
return formatted_messages, system_prompt
|
return formatted_messages, system_prompt
|
||||||
|
|
||||||
|
|
||||||
|
def is_reasoning_model(model_name: str) -> bool:
|
||||||
|
return any(model_name.startswith(model) for model in REASONING_MODELS)
|
||||||
|
|||||||
@@ -73,6 +73,10 @@ model_to_prompt_size = {
|
|||||||
"claude-3-7-sonnet-20250219": 60000,
|
"claude-3-7-sonnet-20250219": 60000,
|
||||||
"claude-3-7-sonnet-latest": 60000,
|
"claude-3-7-sonnet-latest": 60000,
|
||||||
"claude-3-5-haiku-20241022": 60000,
|
"claude-3-5-haiku-20241022": 60000,
|
||||||
|
"claude-sonnet-4": 60000,
|
||||||
|
"claude-sonnet-4-20250514": 60000,
|
||||||
|
"claude-opus-4": 60000,
|
||||||
|
"claude-opus-4-20250514": 60000,
|
||||||
# Offline Models
|
# Offline Models
|
||||||
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
|
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
|
||||||
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
||||||
|
|||||||
@@ -4,8 +4,6 @@ import logging
|
|||||||
import os
|
import os
|
||||||
from typing import Callable, List, Optional
|
from typing import Callable, List, Optional
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from khoj.database.adapters import AgentAdapters, ConversationAdapters
|
from khoj.database.adapters import AgentAdapters, ConversationAdapters
|
||||||
from khoj.database.models import Agent, ChatModel, KhojUser
|
from khoj.database.models import Agent, ChatModel, KhojUser
|
||||||
from khoj.processor.operator.operator_actions import *
|
from khoj.processor.operator.operator_actions import *
|
||||||
@@ -49,9 +47,9 @@ async def operate_browser(
|
|||||||
# Initialize Agent
|
# Initialize Agent
|
||||||
max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
|
max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 40))
|
||||||
operator_agent: OperatorAgent
|
operator_agent: OperatorAgent
|
||||||
if reasoning_model.name.startswith("gpt-4o"):
|
if is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI:
|
||||||
operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
|
operator_agent = OpenAIOperatorAgent(query, reasoning_model, max_iterations, tracer)
|
||||||
elif reasoning_model.name.startswith("claude-3-7-sonnet"):
|
elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
|
||||||
operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
|
operator_agent = AnthropicOperatorAgent(query, reasoning_model, max_iterations, tracer)
|
||||||
else:
|
else:
|
||||||
grounding_model_name = "ui-tars-1.5"
|
grounding_model_name = "ui-tars-1.5"
|
||||||
@@ -150,3 +148,18 @@ async def operate_browser(
|
|||||||
"result": user_input_message or response,
|
"result": user_input_message or response,
|
||||||
"webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
|
"webpages": [{"link": url, "snippet": ""} for url in environment.visited_urls],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def is_operator_model(model: str) -> ChatModel.ModelType | None:
|
||||||
|
"""Check if the model is an operator model."""
|
||||||
|
operator_models = {
|
||||||
|
"gpt-4o": ChatModel.ModelType.OPENAI,
|
||||||
|
"claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
|
||||||
|
"claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
|
||||||
|
"claude-opus-4": ChatModel.ModelType.ANTHROPIC,
|
||||||
|
"ui-tars-1.5": ChatModel.ModelType.OFFLINE,
|
||||||
|
}
|
||||||
|
for operator_model in operator_models:
|
||||||
|
if model.startswith(operator_model):
|
||||||
|
return operator_models[operator_model] # type: ignore[return-value]
|
||||||
|
return None
|
||||||
|
|||||||
@@ -3,10 +3,11 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Any, List, Optional, cast
|
from typing import List, Optional, cast
|
||||||
|
|
||||||
from anthropic.types.beta import BetaContentBlock
|
from anthropic.types.beta import BetaContentBlock
|
||||||
|
|
||||||
|
from khoj.processor.conversation.anthropic.utils import is_reasoning_model
|
||||||
from khoj.processor.operator.operator_actions import *
|
from khoj.processor.operator.operator_actions import *
|
||||||
from khoj.processor.operator.operator_agent_base import (
|
from khoj.processor.operator.operator_agent_base import (
|
||||||
AgentActResult,
|
AgentActResult,
|
||||||
@@ -25,8 +26,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|||||||
client = get_anthropic_async_client(
|
client = get_anthropic_async_client(
|
||||||
self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
|
self.vision_model.ai_model_api.api_key, self.vision_model.ai_model_api.api_base_url
|
||||||
)
|
)
|
||||||
tool_version = "2025-01-24"
|
betas = self.model_default_headers()
|
||||||
betas = [f"computer-use-{tool_version}", "token-efficient-tools-2025-02-19"]
|
|
||||||
temperature = 1.0
|
temperature = 1.0
|
||||||
actions: List[OperatorAction] = []
|
actions: List[OperatorAction] = []
|
||||||
action_results: List[dict] = []
|
action_results: List[dict] = []
|
||||||
@@ -56,7 +56,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|||||||
|
|
||||||
tools = [
|
tools = [
|
||||||
{
|
{
|
||||||
"type": f"computer_20250124",
|
"type": self.model_default_tool("computer"),
|
||||||
"name": "computer",
|
"name": "computer",
|
||||||
"display_width_px": 1024,
|
"display_width_px": 1024,
|
||||||
"display_height_px": 768,
|
"display_height_px": 768,
|
||||||
@@ -78,7 +78,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|||||||
]
|
]
|
||||||
|
|
||||||
thinking: dict[str, str | int] = {"type": "disabled"}
|
thinking: dict[str, str | int] = {"type": "disabled"}
|
||||||
if self.vision_model.name.startswith("claude-3-7"):
|
if is_reasoning_model(self.vision_model.name):
|
||||||
thinking = {"type": "enabled", "budget_tokens": 1024}
|
thinking = {"type": "enabled", "budget_tokens": 1024}
|
||||||
|
|
||||||
messages_for_api = self._format_message_for_api(self.messages)
|
messages_for_api = self._format_message_for_api(self.messages)
|
||||||
@@ -381,3 +381,22 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
return coord
|
return coord
|
||||||
|
|
||||||
|
def model_default_tool(self, tool_type: Literal["computer", "editor", "terminal"]) -> str:
|
||||||
|
"""Get the default tool of specified type for the given model."""
|
||||||
|
if self.vision_model.name.startswith("claude-3-7-sonnet"):
|
||||||
|
if tool_type == "computer":
|
||||||
|
return "computer_20250124"
|
||||||
|
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
|
||||||
|
if tool_type == "computer":
|
||||||
|
return "computer_20250124"
|
||||||
|
raise ValueError(f"Unsupported tool type for model '{self.vision_model.name}': {tool_type}")
|
||||||
|
|
||||||
|
def model_default_headers(self) -> list[str]:
|
||||||
|
"""Get the default computer use headers for the given model."""
|
||||||
|
if self.vision_model.name.startswith("claude-3-7-sonnet"):
|
||||||
|
return [f"computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
|
||||||
|
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
|
||||||
|
return ["computer-use-2025-01-24"]
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|||||||
@@ -63,6 +63,12 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|||||||
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
"claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
"claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
"claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
"claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
|
"claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
|
"claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
|
"claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
|
||||||
|
"claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
|
||||||
|
"claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
|
||||||
|
"claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
|
||||||
# Grok pricing: https://docs.x.ai/docs/models
|
# Grok pricing: https://docs.x.ai/docs/models
|
||||||
"grok-3": {"input": 3.0, "output": 15.0},
|
"grok-3": {"input": 3.0, "output": 15.0},
|
||||||
"grok-3-latest": {"input": 3.0, "output": 15.0},
|
"grok-3-latest": {"input": 3.0, "output": 15.0},
|
||||||
|
|||||||
Reference in New Issue
Block a user