Drop invalid messages in reverse order to continue interrupted chats

Previously - message with invalid content were getting dropped in normal order which would change the item index being iterated for gemini and anthropic models - messages with empty content weren't getting dropped for openai compatible api models. While openai api is resilient to this, it's better to drop these invalid messages as other openai compatible APIs may not handle this. We see messages with empty or no content when chat gets interrupted due to disconnections, interrupt messages or explicit aborts by user. This changes should now drop invalid messages and not mess formatting of the other messages in a conversation. It should allow continuing interrupted conversations with any ai model.
2026-04-20 01:24:31 +00:00 · 2025-07-10 21:55:40 -07:00
parent f1a3ddf2ca
commit c8ec29551f
3 changed files with 20 additions and 4 deletions
--- a/src/khoj/processor/conversation/anthropic/utils.py
+++ b/src/khoj/processor/conversation/anthropic/utils.py
@@ -299,7 +299,7 @@ def format_messages_for_anthropic(raw_messages: list[ChatMessage], system_prompt
    if len(messages) == 1 and message_type != "tool_call":
        messages[0].role = "user"

-    for message in messages:
+    for message in reversed(messages):  # Process in reverse to not mess up iterator when drop invalid messages
        # Handle tool call and tool result message types from additional_kwargs
        message_type = message.additional_kwargs.get("message_type")
        if message_type == "tool_call":
--- a/src/khoj/processor/conversation/google/utils.py
+++ b/src/khoj/processor/conversation/google/utils.py
@@ -380,7 +380,7 @@ def format_messages_for_gemini(
            messages.remove(message)
    system_prompt = None if is_none_or_empty(system_prompt) else system_prompt

-    for message in messages:
+    for message in reversed(messages):  # Process in reverse to not mess up iterator when drop invalid messages
        if message.role == "assistant":
            message.role = "model"

--- a/src/khoj/processor/conversation/openai/utils.py
+++ b/src/khoj/processor/conversation/openai/utils.py
@@ -359,12 +359,13 @@ def get_structured_output_support(model_name: str, api_base_url: str = None) ->
    return StructuredOutputSupport.TOOL


-def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> List[dict]:
+def format_message_for_api(raw_messages: List[ChatMessage], api_base_url: str) -> List[dict]:
    """
    Format messages to send to chat model served over OpenAI (compatible) API.
    """
    formatted_messages = []
-    for message in deepcopy(messages):
+    messages = deepcopy(raw_messages)
+    for message in reversed(messages):  # Process in reverse to not mess up iterator when drop invalid messages
        # Handle tool call and tool result message types
        message_type = message.additional_kwargs.get("message_type")
        if message_type == "tool_call":
@@ -425,6 +426,21 @@ def format_message_for_api(messages: List[ChatMessage], api_base_url: str) -> Li
                    message.content += [{"type": "text", "text": assistant_texts_str}]
                else:
                    message.content = assistant_texts_str
+        elif isinstance(message.content, list):
+            # Drop invalid content parts
+            for part in reversed(message.content):
+                if part["type"] == "text" and not part.get("text"):
+                    message.content.remove(part)
+                elif part["type"] == "image_url" and not part.get("image_url"):
+                    message.content.remove(part)
+            # If no valid content parts left, remove the message
+            if is_none_or_empty(message.content):
+                messages.remove(message)
+                continue
+        elif isinstance(message.content, str) and not message.content.strip():
+            # If content is empty string, remove the message
+            messages.remove(message)
+            continue
        formatted_messages.append({"role": message.role, "content": message.content})

    return formatted_messages