mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Do not overwrite system_prompt for idempotent AI API calls retry
Previously on tenacity retry the system_prompt could get overwritten
This commit is contained in:
@@ -60,7 +60,7 @@ def anthropic_completion_with_backoff(
|
||||
client = get_anthropic_client(api_key, api_base_url)
|
||||
anthropic_clients[api_key] = client
|
||||
|
||||
formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
|
||||
formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
|
||||
|
||||
aggregated_response = ""
|
||||
if response_type == "json_object" and not deepthought:
|
||||
@@ -70,8 +70,8 @@ def anthropic_completion_with_backoff(
|
||||
|
||||
final_message = None
|
||||
model_kwargs = model_kwargs or dict()
|
||||
if system_prompt:
|
||||
model_kwargs["system"] = system_prompt
|
||||
if system:
|
||||
model_kwargs["system"] = system
|
||||
|
||||
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
||||
if deepthought and is_reasoning_model(model_name):
|
||||
@@ -146,7 +146,7 @@ async def anthropic_chat_completion_with_backoff(
|
||||
# Temperature control not supported when using extended thinking
|
||||
temperature = 1.0
|
||||
|
||||
formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
|
||||
formatted_messages, system = format_messages_for_anthropic(messages, system_prompt)
|
||||
|
||||
aggregated_response = ""
|
||||
response_started = False
|
||||
@@ -156,7 +156,7 @@ async def anthropic_chat_completion_with_backoff(
|
||||
messages=formatted_messages,
|
||||
model=model_name, # type: ignore
|
||||
temperature=temperature,
|
||||
system=system_prompt,
|
||||
system=system,
|
||||
timeout=20,
|
||||
max_tokens=max_tokens,
|
||||
**model_kwargs,
|
||||
|
||||
@@ -102,7 +102,7 @@ def gemini_completion_with_backoff(
|
||||
client = get_gemini_client(api_key, api_base_url)
|
||||
gemini_clients[api_key] = client
|
||||
|
||||
formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
|
||||
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
||||
|
||||
# format model response schema
|
||||
response_schema = None
|
||||
@@ -115,7 +115,7 @@ def gemini_completion_with_backoff(
|
||||
|
||||
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
||||
config = gtypes.GenerateContentConfig(
|
||||
system_instruction=system_prompt,
|
||||
system_instruction=system_instruction,
|
||||
temperature=temperature,
|
||||
thinking_config=thinking_config,
|
||||
max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
|
||||
@@ -184,7 +184,7 @@ async def gemini_chat_completion_with_backoff(
|
||||
client = get_gemini_client(api_key, api_base_url)
|
||||
gemini_clients[api_key] = client
|
||||
|
||||
formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
|
||||
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
||||
|
||||
thinking_config = None
|
||||
if deepthought and model_name.startswith("gemini-2-5"):
|
||||
@@ -192,7 +192,7 @@ async def gemini_chat_completion_with_backoff(
|
||||
|
||||
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
||||
config = gtypes.GenerateContentConfig(
|
||||
system_instruction=system_prompt,
|
||||
system_instruction=system_instruction,
|
||||
temperature=temperature,
|
||||
thinking_config=thinking_config,
|
||||
max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
|
||||
|
||||
Reference in New Issue
Block a user