From 181332dcb8c2ec24a32cc2dcb49f26b80c3f6a5c Mon Sep 17 00:00:00 2001 From: Debanjum Date: Fri, 5 Dec 2025 11:45:10 -0800 Subject: [PATCH] Improve Claude context caching to improve response cost, intelligence Old thought messages are dropped by default by the Anthropic API. This change ensures old thoughts are kept. This should improve cache utilization to reduce costs. And keeping old thoughts may also improve model intelligence. --- pyproject.toml | 2 +- .../processor/conversation/anthropic/utils.py | 6 ++++-- uv.lock | 18 ++++++++++++++---- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3015ce43..712f8ccf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ dependencies = [ "pytz ~= 2024.1", "cron-descriptor == 1.4.3", "django_apscheduler == 0.7.0", - "anthropic == 0.52.0", + "anthropic == 0.75.0", "docx2txt == 0.8", "google-genai == 1.52.0", "google-auth ~= 2.23.3", diff --git a/src/khoj/processor/conversation/anthropic/utils.py b/src/khoj/processor/conversation/anthropic/utils.py index 42a8a668..cf0fe0dc 100644 --- a/src/khoj/processor/conversation/anthropic/utils.py +++ b/src/khoj/processor/conversation/anthropic/utils.py @@ -103,11 +103,13 @@ def anthropic_completion_with_backoff( max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC if deepthought and is_reasoning_model(model_name): model_kwargs["thinking"] = {"type": "enabled", "budget_tokens": MAX_REASONING_TOKENS_ANTHROPIC} + model_kwargs["betas"] = ["context-management-2025-06-27"] + model_kwargs["context_management"] = {"edits": [{"type": "clear_thinking_20251015", "keep": "all"}]} max_tokens += MAX_REASONING_TOKENS_ANTHROPIC # Temperature control not supported when using extended thinking temperature = 1.0 - with client.messages.stream( + with client.beta.messages.stream( messages=formatted_messages, model=model_name, # type: ignore temperature=temperature, @@ -125,7 +127,7 @@ def anthropic_completion_with_backoff( final_message = stream.get_final_message() # Track raw content of model response to reuse for cache hits in multi-turn chats - raw_content = [item.model_dump() for item in final_message.content] + raw_content = [item.model_dump(exclude_none=True) for item in final_message.content] # Extract all tool calls if tools are enabled if tools: diff --git a/uv.lock b/uv.lock index 6b31a20d..ad326da4 100644 --- a/uv.lock +++ b/uv.lock @@ -112,20 +112,21 @@ wheels = [ [[package]] name = "anthropic" -version = "0.52.0" +version = "0.75.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, { name = "distro" }, + { name = "docstring-parser" }, { name = "httpx" }, { name = "jiter" }, { name = "pydantic" }, { name = "sniffio" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/57/fd/8a9332f5baf352c272494a9d359863a53385a208954c1a7251a524071930/anthropic-0.52.0.tar.gz", hash = "sha256:f06bc924d7eb85f8a43fe587b875ff58b410d60251b7dc5f1387b322a35bd67b", size = 229372, upload-time = "2025-05-22T16:42:22.044Z" } +sdist = { url = "https://files.pythonhosted.org/packages/04/1f/08e95f4b7e2d35205ae5dcbb4ae97e7d477fc521c275c02609e2931ece2d/anthropic-0.75.0.tar.gz", hash = "sha256:e8607422f4ab616db2ea5baacc215dd5f028da99ce2f022e33c7c535b29f3dfb", size = 439565, upload-time = "2025-11-24T20:41:45.28Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/43/172c0031654908bbac2a87d356fff4de1b4947a9b14b9658540b69416417/anthropic-0.52.0-py3-none-any.whl", hash = "sha256:c026daa164f0e3bde36ce9cbdd27f5f1419fff03306be1e138726f42e6a7810f", size = 286076, upload-time = "2025-05-22T16:42:20Z" }, + { url = "https://files.pythonhosted.org/packages/60/1c/1cd02b7ae64302a6e06724bf80a96401d5313708651d277b1458504a1730/anthropic-0.75.0-py3-none-any.whl", hash = "sha256:ea8317271b6c15d80225a9f3c670152746e88805a7a61e14d4a374577164965b", size = 388164, upload-time = "2025-11-24T20:41:43.587Z" }, ] [[package]] @@ -599,6 +600,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/1b/e0a87d256e40e8c888847551b20a017a6b98139178505dc7ffb96f04e954/dnspython-2.7.0-py3-none-any.whl", hash = "sha256:b4c34b7d10b51bcc3a5071e7b8dee77939f1e878477eeecc965e9835f63c6c86", size = 313632, upload-time = "2024-10-05T20:14:57.687Z" }, ] +[[package]] +name = "docstring-parser" +version = "0.17.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" }, +] + [[package]] name = "docx2txt" version = "0.8" @@ -1287,7 +1297,7 @@ prod = [ [package.metadata] requires-dist = [ { name = "aiohttp", specifier = "~=3.9.0" }, - { name = "anthropic", specifier = "==0.52.0" }, + { name = "anthropic", specifier = "==0.75.0" }, { name = "anyio", specifier = "~=4.8.0" }, { name = "apscheduler", specifier = "~=3.10.0" }, { name = "authlib", specifier = "==1.6.5" },