mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 05:29:12 +00:00
Improve Claude context caching to improve response cost, intelligence
Old thought messages are dropped by default by the Anthropic API. This change ensures old thoughts are kept. This should improve cache utilization to reduce costs. And keeping old thoughts may also improve model intelligence.
This commit is contained in:
@@ -85,7 +85,7 @@ dependencies = [
|
||||
"pytz ~= 2024.1",
|
||||
"cron-descriptor == 1.4.3",
|
||||
"django_apscheduler == 0.7.0",
|
||||
"anthropic == 0.52.0",
|
||||
"anthropic == 0.75.0",
|
||||
"docx2txt == 0.8",
|
||||
"google-genai == 1.52.0",
|
||||
"google-auth ~= 2.23.3",
|
||||
|
||||
@@ -103,11 +103,13 @@ def anthropic_completion_with_backoff(
|
||||
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
|
||||
model_kwargs["betas"] = ["context-management-2025-06-27"]
|
||||
model_kwargs["context_management"] = {"edits": [{"type": "clear_thinking_20251015", "keep": "all"}]}
|
||||
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
|
||||
# Temperature control not supported when using extended thinking
|
||||
temperature = 1.0
|
||||
|
||||
with client.messages.stream(
|
||||
with client.beta.messages.stream(
|
||||
messages=formatted_messages,
|
||||
model=model_name, # type: ignore
|
||||
temperature=temperature,
|
||||
@@ -125,7 +127,7 @@ def anthropic_completion_with_backoff(
|
||||
final_message = stream.get_final_message()
|
||||
|
||||
# Track raw content of model response to reuse for cache hits in multi-turn chats
|
||||
raw_content = [item.model_dump() for item in final_message.content]
|
||||
raw_content = [item.model_dump(exclude_none=True) for item in final_message.content]
|
||||
|
||||
# Extract all tool calls if tools are enabled
|
||||
if tools:
|
||||
|
||||
18
uv.lock
generated
18
uv.lock
generated
@@ -112,20 +112,21 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "anthropic"
|
||||
version = "0.52.0"
|
||||
version = "0.75.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
{ name = "distro" },
|
||||
{ name = "docstring-parser" },
|
||||
{ name = "httpx" },
|
||||
{ name = "jiter" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "sniffio" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/57/fd/8a9332f5baf352c272494a9d359863a53385a208954c1a7251a524071930/anthropic-0.52.0.tar.gz", hash = "sha256:f06bc924d7eb85f8a43fe587b875ff58b410d60251b7dc5f1387b322a35bd67b", size = 229372, upload-time = "2025-05-22T16:42:22.044Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565, upload-time = "2025-11-24T20:41:45.28Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a0/43/172c0031654908bbac2a87d356fff4de1b4947a9b14b9658540b69416417/anthropic-0.52.0-py3-none-any.whl", hash = "sha256:c026daa164f0e3bde36ce9cbdd27f5f1419fff03306be1e138726f42e6a7810f", size = 286076, upload-time = "2025-05-22T16:42:20Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164, upload-time = "2025-11-24T20:41:43.587Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -599,6 +600,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload-time = "2024-10-05T20:14:57.687Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docstring-parser"
|
||||
version = "0.17.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docx2txt"
|
||||
version = "0.8"
|
||||
@@ -1287,7 +1297,7 @@ prod = [
|
||||
[package.metadata]
|
||||
requires-dist = [
|
||||
{ name = "aiohttp", specifier = "~=3.9.0" },
|
||||
{ name = "anthropic", specifier = "==0.52.0" },
|
||||
{ name = "anthropic", specifier = "==0.75.0" },
|
||||
{ name = "anyio", specifier = "~=4.8.0" },
|
||||
{ name = "apscheduler", specifier = "~=3.10.0" },
|
||||
{ name = "authlib", specifier = "==1.6.5" },
|
||||
|
||||
Reference in New Issue
Block a user