diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index 6a644074..e68ebd20 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -85,10 +85,10 @@ async def converse_openai( program_execution_context: List[str] = None, location_data: LocationData = None, chat_history: list[ChatMessageModel] = [], - model: str = "gpt-4o-mini", + model: str = "gpt-4.1-mini", api_key: Optional[str] = None, api_base_url: Optional[str] = None, - temperature: float = 0.4, + temperature: float = 0.6, max_prompt_size=None, tokenizer_name=None, user_name: str = None, diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py index e5db2c45..f5a909d6 100644 --- a/src/khoj/processor/conversation/openai/utils.py +++ b/src/khoj/processor/conversation/openai/utils.py @@ -71,7 +71,7 @@ openai_async_clients: Dict[str, openai.AsyncOpenAI] = {} def completion_with_backoff( messages: List[ChatMessage], model_name: str, - temperature=0.8, + temperature=0.6, openai_api_key=None, api_base_url=None, deepthought: bool = False, @@ -89,14 +89,19 @@ def completion_with_backoff( if stream: model_kwargs["stream_options"] = {"include_usage": True} + model_kwargs["temperature"] = temperature + model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95) + formatted_messages = format_message_for_api(messages, api_base_url) # Tune reasoning models arguments if is_openai_reasoning_model(model_name, api_base_url): - temperature = 1 + model_kwargs["temperature"] = 1 reasoning_effort = "medium" if deepthought else "low" model_kwargs["reasoning_effort"] = reasoning_effort + model_kwargs.pop("top_p", None) elif is_twitter_reasoning_model(model_name, api_base_url): + model_kwargs.pop("temperature", None) reasoning_effort = "high" if deepthought else "low" model_kwargs["reasoning_effort"] = reasoning_effort elif model_name.startswith("deepseek-reasoner"): @@ -131,7 +136,6 @@ def completion_with_backoff( with client.beta.chat.completions.stream( messages=formatted_messages, # type: ignore model=model_name, - temperature=temperature, timeout=httpx.Timeout(30, read=read_timeout), **model_kwargs, ) as chat: @@ -233,9 +237,7 @@ async def chat_completion_with_backoff( openai_api_key=None, api_base_url=None, deepthought=False, - model_kwargs: dict = {}, tracer: dict = {}, - tools=None, ) -> AsyncGenerator[ResponseWithThought, None]: client_key = f"{openai_api_key}--{api_base_url}" client = openai_async_clients.get(client_key) @@ -243,6 +245,7 @@ async def chat_completion_with_backoff( client = get_openai_async_client(openai_api_key, api_base_url) openai_async_clients[client_key] = client + model_kwargs: dict = {} stream = not is_non_streaming_model(model_name, api_base_url) stream_processor = astream_thought_processor if stream: @@ -250,6 +253,8 @@ async def chat_completion_with_backoff( else: model_kwargs.pop("stream_options", None) + model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95) + formatted_messages = format_message_for_api(messages, api_base_url) # Configure thinking for openai reasoning models @@ -257,7 +262,9 @@ async def chat_completion_with_backoff( temperature = 1 reasoning_effort = "medium" if deepthought else "low" model_kwargs["reasoning_effort"] = reasoning_effort - model_kwargs.pop("stop", None) # Remove unsupported stop param for reasoning models + # Remove unsupported params for reasoning models + model_kwargs.pop("top_p", None) + model_kwargs.pop("stop", None) # Get the first system message and add the string `Formatting re-enabled` to it. # See https://platform.openai.com/docs/guides/reasoning-best-practices @@ -304,8 +311,6 @@ async def chat_completion_with_backoff( read_timeout = 300 if is_local_api(api_base_url) else 60 if os.getenv("KHOJ_LLM_SEED"): model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED")) - if tools: - model_kwargs["tools"] = tools aggregated_response = "" final_chunk = None