diff --git a/src/khoj/processor/conversation/openai/utils.py b/src/khoj/processor/conversation/openai/utils.py
index 4f4bd358..3ebb2e62 100644
--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -55,6 +55,10 @@ logger = logging.getLogger(__name__)
 openai_clients: Dict[str, openai.OpenAI] = {}
 openai_async_clients: Dict[str, openai.AsyncOpenAI] = {}
 
+# Default completion tokens
+# Reduce premature termination, especially when streaming structured responses
+MAX_COMPLETION_TOKENS = 16000
+
 
 def _extract_text_for_instructions(content: Union[str, List, Dict, None]) -> str:
     """Extract plain text from a message content suitable for Responses API instructions."""
@@ -111,6 +115,7 @@ def completion_with_backoff(
 
     model_kwargs["temperature"] = temperature
     model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
+    model_kwargs["max_completion_tokens"] = model_kwargs.get("max_completion_tokens", MAX_COMPLETION_TOKENS)
 
     formatted_messages = format_message_for_api(messages, model_name, api_base_url)
 
@@ -303,6 +308,7 @@ async def chat_completion_with_backoff(
         model_kwargs.pop("stream_options", None)
 
     model_kwargs["top_p"] = model_kwargs.get("top_p", 0.95)
+    model_kwargs["max_completion_tokens"] = model_kwargs.get("max_completion_tokens", MAX_COMPLETION_TOKENS)
 
     formatted_messages = format_message_for_api(messages, model_name, api_base_url)