Improve Claude context caching to improve response cost, intelligence

Old thought messages are dropped by default by the Anthropic API. This
change ensures old thoughts are kept. This should improve cache
utilization to reduce costs. And keeping old thoughts may also improve
model intelligence.
This commit is contained in:
Debanjum
2025-12-05 11:45:10 -08:00
parent 9c03af2735
commit 181332dcb8
3 changed files with 19 additions and 7 deletions

View File

@@ -85,7 +85,7 @@ dependencies = [
"pytz ~= 2024.1", "pytz ~= 2024.1",
"cron-descriptor == 1.4.3", "cron-descriptor == 1.4.3",
"django_apscheduler == 0.7.0", "django_apscheduler == 0.7.0",
"anthropic == 0.52.0", "anthropic == 0.75.0",
"docx2txt == 0.8", "docx2txt == 0.8",
"google-genai == 1.52.0", "google-genai == 1.52.0",
"google-auth ~= 2.23.3", "google-auth ~= 2.23.3",

View File

@@ -103,11 +103,13 @@ def anthropic_completion_with_backoff(
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
if deepthought and is_reasoning_model(model_name): if deepthought and is_reasoning_model(model_name):
model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC} model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC}
model_kwargs["betas"] = ["context-management-2025-06-27"]
model_kwargs["context_management"] = {"edits": [{"type": "clear_thinking_20251015", "keep": "all"}]}
max_tokens += MAX_REASONING_TOKENS_ANTHROPIC max_tokens += MAX_REASONING_TOKENS_ANTHROPIC
# Temperature control not supported when using extended thinking # Temperature control not supported when using extended thinking
temperature = 1.0 temperature = 1.0
with client.messages.stream( with client.beta.messages.stream(
messages=formatted_messages, messages=formatted_messages,
model=model_name, # type: ignore model=model_name, # type: ignore
temperature=temperature, temperature=temperature,
@@ -125,7 +127,7 @@ def anthropic_completion_with_backoff(
final_message = stream.get_final_message() final_message = stream.get_final_message()
# Track raw content of model response to reuse for cache hits in multi-turn chats # Track raw content of model response to reuse for cache hits in multi-turn chats
raw_content = [item.model_dump() for item in final_message.content] raw_content = [item.model_dump(exclude_none=True) for item in final_message.content]
# Extract all tool calls if tools are enabled # Extract all tool calls if tools are enabled
if tools: if tools:

18
uv.lock generated
View File

@@ -112,20 +112,21 @@ wheels = [
[[package]] [[package]]
name = "anthropic" name = "anthropic"
version = "0.52.0" version = "0.75.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
{ name = "distro" }, { name = "distro" },
{ name = "docstring-parser" },
{ name = "httpx" }, { name = "httpx" },
{ name = "jiter" }, { name = "jiter" },
{ name = "pydantic" }, { name = "pydantic" },
{ name = "sniffio" }, { name = "sniffio" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/57/fd/8a9332f5baf352c272494a9d359863a53385a208954c1a7251a524071930/anthropic-0.52.0.tar.gz", hash = "sha256:f06bc924d7eb85f8a43fe587b875ff58b410d60251b7dc5f1387b322a35bd67b", size = 229372, upload-time = "2025-05-22T16:42:22.044Z" } sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565, upload-time = "2025-11-24T20:41:45.28Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/a0/43/172c0031654908bbac2a87d356fff4de1b4947a9b14b9658540b69416417/anthropic-0.52.0-py3-none-any.whl", hash = "sha256:c026daa164f0e3bde36ce9cbdd27f5f1419fff03306be1e138726f42e6a7810f", size = 286076, upload-time = "2025-05-22T16:42:20Z" }, { url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164, upload-time = "2025-11-24T20:41:43.587Z" },
] ]
[[package]] [[package]]
@@ -599,6 +600,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload-time = "2024-10-05T20:14:57.687Z" }, { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload-time = "2024-10-05T20:14:57.687Z" },
] ]
[[package]]
name = "docstring-parser"
version = "0.17.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
]
[[package]] [[package]]
name = "docx2txt" name = "docx2txt"
version = "0.8" version = "0.8"
@@ -1287,7 +1297,7 @@ prod = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "aiohttp", specifier = "~=3.9.0" }, { name = "aiohttp", specifier = "~=3.9.0" },
{ name = "anthropic", specifier = "==0.52.0" }, { name = "anthropic", specifier = "==0.75.0" },
{ name = "anyio", specifier = "~=4.8.0" }, { name = "anyio", specifier = "~=4.8.0" },
{ name = "apscheduler", specifier = "~=3.10.0" }, { name = "apscheduler", specifier = "~=3.10.0" },
{ name = "authlib", specifier = "==1.6.5" }, { name = "authlib", specifier = "==1.6.5" },