Do not overwrite system_prompt for idempotent AI API calls retry

Previously on tenacity retry the system_prompt could get overwritten
2026-03-02 13:18:18 +00:00 · 2025-05-30 15:43:25 -07:00
parent e0ea151f20
commit 0d1e6b0d53
2 changed files with 9 additions and 9 deletions
--- a/src/khoj/processor/conversation/anthropic/utils.py
+++ b/src/khoj/processor/conversation/anthropic/utils.py
@@ -60,7 +60,7 @@ def anthropic_completion_with_backoff(
        client = get_anthropic_client(api_key, api_base_url)
        anthropic_clients[api_key] = client

-    formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
+    formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)

    aggregated_response = ""
    if response_type == "json_object" and not deepthought:
@@ -70,8 +70,8 @@ def anthropic_completion_with_backoff(

    final_message = None
    model_kwargs = model_kwargs or dict()
-    if system_prompt:
-        model_kwargs["system"] = system_prompt
+    if system:
+        model_kwargs["system"] = system

    max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
    if deepthought and is_reasoning_model(model_name):
@@ -146,7 +146,7 @@ async def anthropic_chat_completion_with_backoff(
        # Temperature control not supported when using extended thinking
        temperature = 1.0

-    formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
+    formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)

    aggregated_response = ""
    response_started = False
@@ -156,7 +156,7 @@ async def anthropic_chat_completion_with_backoff(
        messages=formatted_messages,
        model=model_name,  # type: ignore
        temperature=temperature,
-        system=system_prompt,
+        system=system,
        timeout=20,
        max_tokens=max_tokens,
        **model_kwargs,
--- a/src/khoj/processor/conversation/google/utils.py
+++ b/src/khoj/processor/conversation/google/utils.py
@@ -102,7 +102,7 @@ def gemini_completion_with_backoff(
        client = get_gemini_client(api_key, api_base_url)
        gemini_clients[api_key] = client

-    formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
+    formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)

    # format model response schema
    response_schema = None
@@ -115,7 +115,7 @@ def gemini_completion_with_backoff(

    seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
    config = gtypes.GenerateContentConfig(
-        system_instruction=system_prompt,
+        system_instruction=system_instruction,
        temperature=temperature,
        thinking_config=thinking_config,
        max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
@@ -184,7 +184,7 @@ async def gemini_chat_completion_with_backoff(
        client = get_gemini_client(api_key, api_base_url)
        gemini_clients[api_key] = client

-    formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
+    formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)

    thinking_config = None
    if deepthought and model_name.startswith("gemini-2-5"):
@@ -192,7 +192,7 @@ async def gemini_chat_completion_with_backoff(

    seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
    config = gtypes.GenerateContentConfig(
-        system_instruction=system_prompt,
+        system_instruction=system_instruction,
        temperature=temperature,
        thinking_config=thinking_config,
        max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,