diff --git a/src/khoj/processor/conversation/anthropic/utils.py b/src/khoj/processor/conversation/anthropic/utils.py index c466223e..6dc8c861 100644 --- a/src/khoj/processor/conversation/anthropic/utils.py +++ b/src/khoj/processor/conversation/anthropic/utils.py @@ -60,7 +60,7 @@ def anthropic_completion_with_backoff( client = get_anthropic_client(api_key, api_base_url) anthropic_clients[api_key] = client - formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt) + formatted_messages, system = format_messages_for_anthropic(messages, system_prompt) aggregated_response = "" if response_type == "json_object" and not deepthought: @@ -70,8 +70,8 @@ def anthropic_completion_with_backoff( final_message = None model_kwargs = model_kwargs or dict() - if system_prompt: - model_kwargs["system"] = system_prompt + if system: + model_kwargs["system"] = system max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC if deepthought and is_reasoning_model(model_name): @@ -146,7 +146,7 @@ async def anthropic_chat_completion_with_backoff( # Temperature control not supported when using extended thinking temperature = 1.0 - formatted_messages, system_prompt = format_messages_for_anthropic(messages, system_prompt) + formatted_messages, system = format_messages_for_anthropic(messages, system_prompt) aggregated_response = "" response_started = False @@ -156,7 +156,7 @@ async def anthropic_chat_completion_with_backoff( messages=formatted_messages, model=model_name, # type: ignore temperature=temperature, - system=system_prompt, + system=system, timeout=20, max_tokens=max_tokens, **model_kwargs, diff --git a/src/khoj/processor/conversation/google/utils.py b/src/khoj/processor/conversation/google/utils.py index a72b8b84..65cee030 100644 --- a/src/khoj/processor/conversation/google/utils.py +++ b/src/khoj/processor/conversation/google/utils.py @@ -102,7 +102,7 @@ def gemini_completion_with_backoff( client = get_gemini_client(api_key, api_base_url) gemini_clients[api_key] = client - formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt) + formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt) # format model response schema response_schema = None @@ -115,7 +115,7 @@ def gemini_completion_with_backoff( seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None config = gtypes.GenerateContentConfig( - system_instruction=system_prompt, + system_instruction=system_instruction, temperature=temperature, thinking_config=thinking_config, max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI, @@ -184,7 +184,7 @@ async def gemini_chat_completion_with_backoff( client = get_gemini_client(api_key, api_base_url) gemini_clients[api_key] = client - formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt) + formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt) thinking_config = None if deepthought and model_name.startswith("gemini-2-5"): @@ -192,7 +192,7 @@ async def gemini_chat_completion_with_backoff( seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None config = gtypes.GenerateContentConfig( - system_instruction=system_prompt, + system_instruction=system_instruction, temperature=temperature, thinking_config=thinking_config, max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,