mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-09 05:39:12 +00:00
Cache messages to anthropic models from chat actors for efficiency
This commit is contained in:
@@ -231,7 +231,10 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|||||||
else:
|
else:
|
||||||
system_prompt += message.content
|
system_prompt += message.content
|
||||||
messages.remove(message)
|
messages.remove(message)
|
||||||
system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
|
if not is_none_or_empty(system_prompt):
|
||||||
|
system = [{"type": "text", "text": system_prompt, "cache_control": {"type": "ephemeral"}}]
|
||||||
|
else:
|
||||||
|
system = None
|
||||||
|
|
||||||
# Anthropic requires the first message to be a 'user' message
|
# Anthropic requires the first message to be a 'user' message
|
||||||
if len(messages) == 1:
|
if len(messages) == 1:
|
||||||
@@ -274,12 +277,32 @@ def format_messages_for_anthropic(messages: list[ChatMessage], system_prompt: st
|
|||||||
logger.error(f"Drop message with empty content as not supported:\n{message}")
|
logger.error(f"Drop message with empty content as not supported:\n{message}")
|
||||||
messages.remove(message)
|
messages.remove(message)
|
||||||
continue
|
continue
|
||||||
|
if isinstance(message.content, str):
|
||||||
|
message.content = [{"type": "text", "text": message.content}]
|
||||||
|
|
||||||
|
# Add cache control to enable prompt caching for conversations with sufficient context
|
||||||
|
# Only add caching if we have multiple messages to make it worthwhile
|
||||||
|
if len(messages) > 2:
|
||||||
|
# Remove any existing cache controls from previous messages
|
||||||
|
for message in messages: # All except the last message
|
||||||
|
if isinstance(message.content, list):
|
||||||
|
for block in message.content:
|
||||||
|
if isinstance(block, dict) and "cache_control" in block:
|
||||||
|
del block["cache_control"]
|
||||||
|
|
||||||
|
# Add cache control to the last content block of second to last message.
|
||||||
|
# In research mode, this message content is list of iterations, updated after each research iteration.
|
||||||
|
# Caching it should improve research efficiency.
|
||||||
|
cache_message = messages[-2]
|
||||||
|
if isinstance(cache_message.content, list) and cache_message.content:
|
||||||
|
# Add cache control to the last content block
|
||||||
|
cache_message.content[-1]["cache_control"] = {"type": "ephemeral"}
|
||||||
|
|
||||||
formatted_messages: List[anthropic.types.MessageParam] = [
|
formatted_messages: List[anthropic.types.MessageParam] = [
|
||||||
anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages
|
anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages
|
||||||
]
|
]
|
||||||
|
|
||||||
return formatted_messages, system_prompt
|
return formatted_messages, system
|
||||||
|
|
||||||
|
|
||||||
def is_reasoning_model(model_name: str) -> bool:
|
def is_reasoning_model(model_name: str) -> bool:
|
||||||
|
|||||||
Reference in New Issue
Block a user