mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-02 13:18:18 +00:00
Set openai api output tokens high by default to not hit length limits
Explicitly set completion tokens high to avoid early termination issues, especially when trying to generate structured responses.
This commit is contained in:
@@ -55,6 +55,10 @@ logger = logging.getLogger(__name__)
|
||||
openai_clients: Dict[str, openai.OpenAI] = {}
|
||||
openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
|
||||
|
||||
# Default completion tokens
|
||||
# Reduce premature termination, especially when streaming structured responses
|
||||
MAX_COMPLETION_TOKENS = 16000
|
||||
|
||||
|
||||
def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
|
||||
"""Extract plain text from a message content suitable for Responses API instructions."""
|
||||
@@ -111,6 +115,7 @@ def completion_with_backoff(
|
||||
|
||||
model_kwargs["temperature"] = temperature
|
||||
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
|
||||
model_kwargs["max_completion_tokens"] = model_kwargs.get("max_completion_tokens", MAX_COMPLETION_TOKENS)
|
||||
|
||||
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
|
||||
|
||||
@@ -303,6 +308,7 @@ async def chat_completion_with_backoff(
|
||||
model_kwargs.pop("stream_options", None)
|
||||
|
||||
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
|
||||
model_kwargs["max_completion_tokens"] = model_kwargs.get("max_completion_tokens", MAX_COMPLETION_TOKENS)
|
||||
|
||||
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user