Set openai api output tokens high by default to not hit length limits

Explicitly set completion tokens high to avoid early termination
issues, especially when trying to generate structured responses.
This commit is contained in:
Debanjum
2025-12-04 21:53:10 -08:00
parent 5b6dab1627
commit bdf9afa726

View File

@@ -55,6 +55,10 @@ logger = logging.getLogger(__name__)
openai_clients: Dict[str, openai.OpenAI] = {}
openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
# Default completion tokens
# Reduce premature termination, especially when streaming structured responses
MAX_COMPLETION_TOKENS = 16000
def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
"""Extract plain text from a message content suitable for Responses API instructions."""
@@ -111,6 +115,7 @@ def completion_with_backoff(
model_kwargs["temperature"] = temperature
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
model_kwargs["max_completion_tokens"] = model_kwargs.get("max_completion_tokens", MAX_COMPLETION_TOKENS)
formatted_messages = format_message_for_api(messages, model_name, api_base_url)
@@ -303,6 +308,7 @@ async def chat_completion_with_backoff(
model_kwargs.pop("stream_options", None)
model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
model_kwargs["max_completion_tokens"] = model_kwargs.get("max_completion_tokens", MAX_COMPLETION_TOKENS)
formatted_messages = format_message_for_api(messages, model_name, api_base_url)