From c87fce5930f931529bb67c0e045078dcc296adfe Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 1 Dec 2024 18:35:31 -0800 Subject: [PATCH] Add a migration to use the new image storage format for past conversations - Added it to the Django migrations so that it auto-triggers when someone updates their server and starts it up again for the first time. This will require that they update their clients as well in order to view/consume image content. - Remove server-side references in the code that allow to parse the text-to-image intent as it will no longer be necessary, given the chat logs will be migrated --- .../components/chatMessage/chatMessage.tsx | 21 ----- src/khoj/database/admin.py | 13 +-- ...5_migrate_generated_assets_and_validate.py | 85 +++++++++++++++++++ .../conversation/anthropic/anthropic_chat.py | 2 +- .../conversation/google/gemini_chat.py | 2 +- .../conversation/offline/chat_model.py | 2 +- src/khoj/processor/conversation/utils.py | 5 +- src/khoj/processor/image/generate.py | 3 - 8 files changed, 92 insertions(+), 41 deletions(-) create mode 100644 src/khoj/database/migrations/0075_migrate_generated_assets_and_validate.py diff --git a/src/interface/web/app/components/chatMessage/chatMessage.tsx b/src/interface/web/app/components/chatMessage/chatMessage.tsx index e4e35952..49ce4c00 100644 --- a/src/interface/web/app/components/chatMessage/chatMessage.tsx +++ b/src/interface/web/app/components/chatMessage/chatMessage.tsx @@ -413,27 +413,6 @@ const ChatMessage = forwardRef((props, ref) => .replace(/\\\[/g, "LEFTBRACKET") .replace(/\\\]/g, "RIGHTBRACKET"); - const intentTypeHandlers = { - "text-to-image": (msg: string) => `![generated image](data:image/png;base64,${msg})`, - "text-to-image2": (msg: string) => `![generated image](${msg})`, - "text-to-image-v3": (msg: string) => - `![generated image](data:image/webp;base64,${msg})`, - excalidraw: (msg: string) => msg, - }; - - // Handle intent-specific rendering - if (props.chatMessage.intent) { - const { type, "inferred-queries": inferredQueries } = props.chatMessage.intent; - - if (type in intentTypeHandlers) { - message = intentTypeHandlers[type as keyof typeof intentTypeHandlers](message); - } - - if (type.includes("text-to-image") && inferredQueries?.length > 0) { - message += `\n\n${inferredQueries[0]}`; - } - } - // Replace file links with base64 data message = renderCodeGenImageInline(message, props.chatMessage.codeContext); diff --git a/src/khoj/database/admin.py b/src/khoj/database/admin.py index 906f2ffe..b71f1f81 100644 --- a/src/khoj/database/admin.py +++ b/src/khoj/database/admin.py @@ -286,17 +286,10 @@ class ConversationAdmin(admin.ModelAdmin): modified_log = conversation.conversation_log chat_log = modified_log.get("chat", []) for idx, log in enumerate(chat_log): - if ( - log["by"] == "khoj" - and log["intent"] - and log["intent"]["type"] - and ( - log["intent"]["type"] == ImageIntentType.TEXT_TO_IMAGE.value - or log["intent"]["type"] == ImageIntentType.TEXT_TO_IMAGE_V3.value - ) - ): - log["message"] = "inline image redacted for space" + if log["by"] == "khoj" and log["images"]: + log["images"] = ["inline image redacted for space"] chat_log[idx] = log + modified_log["chat"] = chat_log writer.writerow( diff --git a/src/khoj/database/migrations/0075_migrate_generated_assets_and_validate.py b/src/khoj/database/migrations/0075_migrate_generated_assets_and_validate.py new file mode 100644 index 00000000..40c74ebf --- /dev/null +++ b/src/khoj/database/migrations/0075_migrate_generated_assets_and_validate.py @@ -0,0 +1,85 @@ +# Made manually by sabaimran for use by Django 5.0.9 on 2024-12-01 16:59 + +from django.db import migrations, models + +# This script was written alongside when Pydantic validation was added to the Conversation conversation_log field. + + +def migrate_generated_assets(apps, schema_editor): + Conversation = apps.get_model("database", "Conversation") + + # Process conversations in chunks + for conversation in Conversation.objects.iterator(): + try: + meta_log = conversation.conversation_log + modified = False + + for chat in meta_log.get("chat", []): + intent_type = chat.get("intent", {}).get("type") + + if intent_type and chat["by"] == "khoj": + if intent_type and "text-to-image" in intent_type: + # Migrate the generated image to the new format + chat["images"] = [chat.get("message")] + chat["message"] = chat["intent"]["inferred-queries"][0] + modified = True + + if intent_type and "excalidraw" in intent_type: + # Migrate the generated excalidraw to the new format + chat["excalidrawDiagram"] = chat.get("message") + chat["message"] = chat["intent"]["inferred-queries"][0] + modified = True + + # Only save if changes were made + if modified: + conversation.conversation_log = meta_log + conversation.save() + + except Exception as e: + print(f"Error processing conversation {conversation.id}: {str(e)}") + continue + + +def reverse_migration(apps, schema_editor): + Conversation = apps.get_model("database", "Conversation") + + # Process conversations in chunks + for conversation in Conversation.objects.iterator(): + try: + meta_log = conversation.conversation_log + modified = False + + for chat in meta_log.get("chat", []): + intent_type = chat.get("intent", {}).get("type") + + if intent_type and chat["by"] == "khoj": + if intent_type and "text-to-image" in intent_type: + # Migrate the generated image back to the old format + chat["message"] = chat.get("images", [])[0] + chat.pop("images", None) + modified = True + + if intent_type and "excalidraw" in intent_type: + # Migrate the generated excalidraw back to the old format + chat["message"] = chat.get("excalidrawDiagram") + chat.pop("excalidrawDiagram", None) + modified = True + + # Only save if changes were made + if modified: + conversation.conversation_log = meta_log + conversation.save() + + except Exception as e: + print(f"Error processing conversation {conversation.id}: {str(e)}") + continue + + +class Migration(migrations.Migration): + dependencies = [ + ("database", "0074_alter_conversation_title"), + ] + + operations = [ + migrations.RunPython(migrate_generated_assets, reverse_migration), + ] diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index 15f9fa17..e72146e5 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -55,7 +55,7 @@ def extract_questions_anthropic( [ f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type") + if chat["by"] == "khoj" ] ) diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index 14f28303..fc49e35f 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -56,7 +56,7 @@ def extract_questions_gemini( [ f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n' for chat in conversation_log.get("chat", [])[-4:] - if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type") + if chat["by"] == "khoj" ] ) diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 7db70bc1..d493dd30 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -69,7 +69,7 @@ def extract_questions_offline( if use_history: for chat in conversation_log.get("chat", [])[-4:]: - if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type"): + if chat["by"] == "khoj": chat_history += f"Q: {chat['intent']['query']}\n" chat_history += f"Khoj: {chat['message']}\n\n" diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index a24ef899..64d42716 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -154,9 +154,6 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n' chat_history += f"{agent_name}: {chat['message']}\n\n" - elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")): - chat_history += f"User: {chat['intent']['query']}\n" - chat_history += f"{agent_name}: [generated image redacted for space]\n" elif chat["by"] == "khoj" and chat.get("images"): chat_history += f"User: {chat['intent']['query']}\n" chat_history += f"{agent_name}: [generated image redacted for space]\n" @@ -320,7 +317,7 @@ def save_to_conversation_log( Saved Conversation Turn You ({user.username}): "{q}" -Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}" +Khoj: "{chat_response}" """.strip() ) diff --git a/src/khoj/processor/image/generate.py b/src/khoj/processor/image/generate.py index 1bec7f41..e543ac7d 100644 --- a/src/khoj/processor/image/generate.py +++ b/src/khoj/processor/image/generate.py @@ -49,9 +49,6 @@ async def text_to_image( if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]: chat_history += f"Q: {chat['intent']['query']}\n" chat_history += f"A: {chat['message']}\n" - elif chat["by"] == "khoj" and "text-to-image" in chat["intent"].get("type"): - chat_history += f"Q: Prompt: {chat['intent']['query']}\n" - chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n" elif chat["by"] == "khoj" and chat.get("images"): chat_history += f"Q: {chat['intent']['query']}\n" chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n"