Do not overwrite system_prompt for idempotent AI API calls retry

Previously on tenacity retry the system_prompt could get overwritten
This commit is contained in:
Debanjum
2025-05-30 15:43:25 -07:00
parent e0ea151f20
commit 0d1e6b0d53
2 changed files with 9 additions and 9 deletions

View File

@@ -60,7 +60,7 @@ def anthropic_completion_with_backoff(
client = get_anthropic_client(api_key, api_base_url)
anthropic_clients[api_key] = client
formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
aggregated_response = ""
if response_type == "json_object" and not deepthought:
@@ -70,8 +70,8 @@ def anthropic_completion_with_backoff(
final_message = None
model_kwargs = model_kwargs or dict()
if system_prompt:
model_kwargs["system"] = system_prompt
if system:
model_kwargs["system"] = system
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
if deepthought and is_reasoning_model(model_name):
@@ -146,7 +146,7 @@ async def anthropic_chat_completion_with_backoff(
# Temperature control not supported when using extended thinking
temperature = 1.0
formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
aggregated_response = ""
response_started = False
@@ -156,7 +156,7 @@ async def anthropic_chat_completion_with_backoff(
messages=formatted_messages,
model=model_name, # type: ignore
temperature=temperature,
system=system_prompt,
system=system,
timeout=20,
max_tokens=max_tokens,
**model_kwargs,

View File

@@ -102,7 +102,7 @@ def gemini_completion_with_backoff(
client = get_gemini_client(api_key, api_base_url)
gemini_clients[api_key] = client
formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
# format model response schema
response_schema = None
@@ -115,7 +115,7 @@ def gemini_completion_with_backoff(
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
config = gtypes.GenerateContentConfig(
system_instruction=system_prompt,
system_instruction=system_instruction,
temperature=temperature,
thinking_config=thinking_config,
max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
@@ -184,7 +184,7 @@ async def gemini_chat_completion_with_backoff(
client = get_gemini_client(api_key, api_base_url)
gemini_clients[api_key] = client
formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
thinking_config = None
if deepthought and model_name.startswith("gemini-2-5"):
@@ -192,7 +192,7 @@ async def gemini_chat_completion_with_backoff(
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
config = gtypes.GenerateContentConfig(
system_instruction=system_prompt,
system_instruction=system_instruction,
temperature=temperature,
thinking_config=thinking_config,
max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,