Add a migration to use the new image storage format for past conversations

- Added it to the Django migrations so that it auto-triggers when someone updates their server and starts it up again for the first time. This will require that they update their clients as well in order to view/consume image content. - Remove server-side references in the code that allow to parse the text-to-image intent as it will no longer be necessary, given the chat logs will be migrated
2026-03-02 21:19:12 +00:00 · 2024-12-01 18:35:31 -08:00
parent 00f48dc1e8
commit c87fce5930
8 changed files with 92 additions and 41 deletions
--- a/src/interface/web/app/components/chatMessage/chatMessage.tsx
+++ b/src/interface/web/app/components/chatMessage/chatMessage.tsx
@@ -413,27 +413,6 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
            .replace(/\\\[/g, "LEFTBRACKET")
            .replace(/\\\]/g, "RIGHTBRACKET");

-        const intentTypeHandlers = {
-            "text-to-image": (msg: string) => `![generated image](data:image/png;base64,${msg})`,
-            "text-to-image2": (msg: string) => `![generated image](${msg})`,
-            "text-to-image-v3": (msg: string) =>
-                `![generated image](data:image/webp;base64,${msg})`,
-            excalidraw: (msg: string) => msg,
-        };
-
-        // Handle intent-specific rendering
-        if (props.chatMessage.intent) {
-            const { type, "inferred-queries": inferredQueries } = props.chatMessage.intent;
-
-            if (type in intentTypeHandlers) {
-                message = intentTypeHandlers[type as keyof typeof intentTypeHandlers](message);
-            }
-
-            if (type.includes("text-to-image") && inferredQueries?.length > 0) {
-                message += `\n\n${inferredQueries[0]}`;
-            }
-        }
-
        // Replace file links with base64 data
        message = renderCodeGenImageInline(message, props.chatMessage.codeContext);

--- a/src/khoj/database/admin.py
+++ b/src/khoj/database/admin.py
@@ -286,17 +286,10 @@ class ConversationAdmin(admin.ModelAdmin):
            modified_log = conversation.conversation_log
            chat_log = modified_log.get("chat", [])
            for idx, log in enumerate(chat_log):
-                if (
-                    log["by"] == "khoj"
-                    and log["intent"]
-                    and log["intent"]["type"]
-                    and (
-                        log["intent"]["type"] == ImageIntentType.TEXT_TO_IMAGE.value
-                        or log["intent"]["type"] == ImageIntentType.TEXT_TO_IMAGE_V3.value
-                    )
-                ):
-                    log["message"] = "inline image redacted for space"
+                if log["by"] == "khoj" and log["images"]:
+                    log["images"] = ["inline image redacted for space"]
                    chat_log[idx] = log
+
            modified_log["chat"] = chat_log

            writer.writerow(
--- a/src/khoj/database/migrations/0075_migrate_generated_assets_and_validate.py
+++ b/src/khoj/database/migrations/0075_migrate_generated_assets_and_validate.py
@@ -0,0 +1,85 @@
+# Made manually by sabaimran for use by Django 5.0.9 on 2024-12-01 16:59
+
+from django.db import migrations, models
+
+# This script was written alongside when Pydantic validation was added to the Conversation conversation_log field.
+
+
+def migrate_generated_assets(apps, schema_editor):
+    Conversation = apps.get_model("database", "Conversation")
+
+    # Process conversations in chunks
+    for conversation in Conversation.objects.iterator():
+        try:
+            meta_log = conversation.conversation_log
+            modified = False
+
+            for chat in meta_log.get("chat", []):
+                intent_type = chat.get("intent", {}).get("type")
+
+                if intent_type and chat["by"] == "khoj":
+                    if intent_type and "text-to-image" in intent_type:
+                        # Migrate the generated image to the new format
+                        chat["images"] = [chat.get("message")]
+                        chat["message"] = chat["intent"]["inferred-queries"][0]
+                        modified = True
+
+                    if intent_type and "excalidraw" in intent_type:
+                        # Migrate the generated excalidraw to the new format
+                        chat["excalidrawDiagram"] = chat.get("message")
+                        chat["message"] = chat["intent"]["inferred-queries"][0]
+                        modified = True
+
+            # Only save if changes were made
+            if modified:
+                conversation.conversation_log = meta_log
+                conversation.save()
+
+        except Exception as e:
+            print(f"Error processing conversation {conversation.id}: {str(e)}")
+            continue
+
+
+def reverse_migration(apps, schema_editor):
+    Conversation = apps.get_model("database", "Conversation")
+
+    # Process conversations in chunks
+    for conversation in Conversation.objects.iterator():
+        try:
+            meta_log = conversation.conversation_log
+            modified = False
+
+            for chat in meta_log.get("chat", []):
+                intent_type = chat.get("intent", {}).get("type")
+
+                if intent_type and chat["by"] == "khoj":
+                    if intent_type and "text-to-image" in intent_type:
+                        # Migrate the generated image back to the old format
+                        chat["message"] = chat.get("images", [])[0]
+                        chat.pop("images", None)
+                        modified = True
+
+                    if intent_type and "excalidraw" in intent_type:
+                        # Migrate the generated excalidraw back to the old format
+                        chat["message"] = chat.get("excalidrawDiagram")
+                        chat.pop("excalidrawDiagram", None)
+                        modified = True
+
+            # Only save if changes were made
+            if modified:
+                conversation.conversation_log = meta_log
+                conversation.save()
+
+        except Exception as e:
+            print(f"Error processing conversation {conversation.id}: {str(e)}")
+            continue
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("database", "0074_alter_conversation_title"),
+    ]
+
+    operations = [
+        migrations.RunPython(migrate_generated_assets, reverse_migration),
+    ]
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@@ -55,7 +55,7 @@ def extract_questions_anthropic(
        [
            f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
            for chat in conversation_log.get("chat", [])[-4:]
-            if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
+            if chat["by"] == "khoj"
        ]
    )

--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -56,7 +56,7 @@ def extract_questions_gemini(
        [
            f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
            for chat in conversation_log.get("chat", [])[-4:]
-            if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
+            if chat["by"] == "khoj"
        ]
    )

--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@@ -69,7 +69,7 @@ def extract_questions_offline(

    if use_history:
        for chat in conversation_log.get("chat", [])[-4:]:
-            if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type"):
+            if chat["by"] == "khoj":
                chat_history += f"Q: {chat['intent']['query']}\n"
                chat_history += f"Khoj: {chat['message']}\n\n"

--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -154,9 +154,6 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
                chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'

            chat_history += f"{agent_name}: {chat['message']}\n\n"
-        elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
-            chat_history += f"User: {chat['intent']['query']}\n"
-            chat_history += f"{agent_name}: [generated image redacted for space]\n"
        elif chat["by"] == "khoj" and chat.get("images"):
            chat_history += f"User: {chat['intent']['query']}\n"
            chat_history += f"{agent_name}: [generated image redacted for space]\n"
@@ -320,7 +317,7 @@ def save_to_conversation_log(
 Saved Conversation Turn
 You ({user.username}): "{q}"

-Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}"
+Khoj: "{chat_response}"
 """.strip()
    )

--- a/src/khoj/processor/image/generate.py
+++ b/src/khoj/processor/image/generate.py
@@ -49,9 +49,6 @@ async def text_to_image(
        if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]:
            chat_history += f"Q: {chat['intent']['query']}\n"
            chat_history += f"A: {chat['message']}\n"
-        elif chat["by"] == "khoj" and "text-to-image" in chat["intent"].get("type"):
-            chat_history += f"Q: Prompt: {chat['intent']['query']}\n"
-            chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n"
        elif chat["by"] == "khoj" and chat.get("images"):
            chat_history += f"Q: {chat['intent']['query']}\n"
            chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n"