Add a migration to use the new image storage format for past conversations

- Added it to the Django migrations so that it auto-triggers when someone updates their server and starts it up again for the first time. This will require that they update their clients as well in order to view/consume image content.
- Remove server-side references in the code that allow to parse the text-to-image intent as it will no longer be necessary, given the chat logs will be migrated
This commit is contained in:
sabaimran
2024-12-01 18:35:31 -08:00
parent 00f48dc1e8
commit c87fce5930
8 changed files with 92 additions and 41 deletions

View File

@@ -413,27 +413,6 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
.replace(/\\\[/g, "LEFTBRACKET")
.replace(/\\\]/g, "RIGHTBRACKET");
const intentTypeHandlers = {
"text-to-image": (msg: string) => `![generated image](data:image/png;base64,${msg})`,
"text-to-image2": (msg: string) => `![generated image](${msg})`,
"text-to-image-v3": (msg: string) =>
`![generated image](data:image/webp;base64,${msg})`,
excalidraw: (msg: string) => msg,
};
// Handle intent-specific rendering
if (props.chatMessage.intent) {
const { type, "inferred-queries": inferredQueries } = props.chatMessage.intent;
if (type in intentTypeHandlers) {
message = intentTypeHandlers[type as keyof typeof intentTypeHandlers](message);
}
if (type.includes("text-to-image") && inferredQueries?.length > 0) {
message += `\n\n${inferredQueries[0]}`;
}
}
// Replace file links with base64 data
message = renderCodeGenImageInline(message, props.chatMessage.codeContext);

View File

@@ -286,17 +286,10 @@ class ConversationAdmin(admin.ModelAdmin):
modified_log = conversation.conversation_log
chat_log = modified_log.get("chat", [])
for idx, log in enumerate(chat_log):
if (
log["by"] == "khoj"
and log["intent"]
and log["intent"]["type"]
and (
log["intent"]["type"] == ImageIntentType.TEXT_TO_IMAGE.value
or log["intent"]["type"] == ImageIntentType.TEXT_TO_IMAGE_V3.value
)
):
log["message"] = "inline image redacted for space"
if log["by"] == "khoj" and log["images"]:
log["images"] = ["inline image redacted for space"]
chat_log[idx] = log
modified_log["chat"] = chat_log
writer.writerow(

View File

@@ -0,0 +1,85 @@
# Made manually by sabaimran for use by Django 5.0.9 on 2024-12-01 16:59
from django.db import migrations, models
# This script was written alongside when Pydantic validation was added to the Conversation conversation_log field.
def migrate_generated_assets(apps, schema_editor):
Conversation = apps.get_model("database", "Conversation")
# Process conversations in chunks
for conversation in Conversation.objects.iterator():
try:
meta_log = conversation.conversation_log
modified = False
for chat in meta_log.get("chat", []):
intent_type = chat.get("intent", {}).get("type")
if intent_type and chat["by"] == "khoj":
if intent_type and "text-to-image" in intent_type:
# Migrate the generated image to the new format
chat["images"] = [chat.get("message")]
chat["message"] = chat["intent"]["inferred-queries"][0]
modified = True
if intent_type and "excalidraw" in intent_type:
# Migrate the generated excalidraw to the new format
chat["excalidrawDiagram"] = chat.get("message")
chat["message"] = chat["intent"]["inferred-queries"][0]
modified = True
# Only save if changes were made
if modified:
conversation.conversation_log = meta_log
conversation.save()
except Exception as e:
print(f"Error processing conversation {conversation.id}: {str(e)}")
continue
def reverse_migration(apps, schema_editor):
Conversation = apps.get_model("database", "Conversation")
# Process conversations in chunks
for conversation in Conversation.objects.iterator():
try:
meta_log = conversation.conversation_log
modified = False
for chat in meta_log.get("chat", []):
intent_type = chat.get("intent", {}).get("type")
if intent_type and chat["by"] == "khoj":
if intent_type and "text-to-image" in intent_type:
# Migrate the generated image back to the old format
chat["message"] = chat.get("images", [])[0]
chat.pop("images", None)
modified = True
if intent_type and "excalidraw" in intent_type:
# Migrate the generated excalidraw back to the old format
chat["message"] = chat.get("excalidrawDiagram")
chat.pop("excalidrawDiagram", None)
modified = True
# Only save if changes were made
if modified:
conversation.conversation_log = meta_log
conversation.save()
except Exception as e:
print(f"Error processing conversation {conversation.id}: {str(e)}")
continue
class Migration(migrations.Migration):
dependencies = [
("database", "0074_alter_conversation_title"),
]
operations = [
migrations.RunPython(migrate_generated_assets, reverse_migration),
]

View File

@@ -55,7 +55,7 @@ def extract_questions_anthropic(
[
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
if chat["by"] == "khoj"
]
)

View File

@@ -56,7 +56,7 @@ def extract_questions_gemini(
[
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
if chat["by"] == "khoj"
]
)

View File

@@ -69,7 +69,7 @@ def extract_questions_offline(
if use_history:
for chat in conversation_log.get("chat", [])[-4:]:
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type"):
if chat["by"] == "khoj":
chat_history += f"Q: {chat['intent']['query']}\n"
chat_history += f"Khoj: {chat['message']}\n\n"

View File

@@ -154,9 +154,6 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
chat_history += f"{agent_name}: {chat['message']}\n\n"
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
chat_history += f"User: {chat['intent']['query']}\n"
chat_history += f"{agent_name}: [generated image redacted for space]\n"
elif chat["by"] == "khoj" and chat.get("images"):
chat_history += f"User: {chat['intent']['query']}\n"
chat_history += f"{agent_name}: [generated image redacted for space]\n"
@@ -320,7 +317,7 @@ def save_to_conversation_log(
Saved Conversation Turn
You ({user.username}): "{q}"
Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}"
Khoj: "{chat_response}"
""".strip()
)

View File

@@ -49,9 +49,6 @@ async def text_to_image(
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]:
chat_history += f"Q: {chat['intent']['query']}\n"
chat_history += f"A: {chat['message']}\n"
elif chat["by"] == "khoj" and "text-to-image" in chat["intent"].get("type"):
chat_history += f"Q: Prompt: {chat['intent']['query']}\n"
chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n"
elif chat["by"] == "khoj" and chat.get("images"):
chat_history += f"Q: {chat['intent']['query']}\n"
chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n"