From 6e3f66c0f12df8b38763f89b5576b679a1e52191 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 4 Dec 2023 22:55:22 -0500 Subject: [PATCH] Use base64 encoded image instead of source URL for persistence The source URL returned by OpenAI would expire soon. This would make the chat sessions contain non-accessible images/messages if using OpenaI image URL Get base64 encoded image from OpenAI and store directly in conversation logs. This resolves the image link expiring issue --- src/interface/desktop/chat.html | 17 ++++------------- src/interface/obsidian/src/chat_modal.ts | 6 +++--- src/khoj/interface/web/chat.html | 17 ++++------------- src/khoj/routers/api.py | 6 +++--- src/khoj/routers/helpers.py | 8 ++++---- 5 files changed, 18 insertions(+), 36 deletions(-) diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index 92a11ebd..e039c6cb 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -181,7 +181,7 @@ function renderMessageWithReference(message, by, context=null, dt=null, onlineContext=null, intentType=null) { if (intentType === "text-to-image") { - let imageMarkdown = `![](${message})`; + let imageMarkdown = `![](data:image/png;base64,${message})`; renderMessage(imageMarkdown, by, dt); return; } @@ -254,20 +254,11 @@ md.renderer.rules.image = function(tokens, idx, options, env, self) { let token = tokens[idx]; - // Get image source url. Only render images with src links - let srcIndex = token.attrIndex('src'); - if (srcIndex < 0) { return ''; } - let src = token.attrs[srcIndex][1]; - - // Wrap the image in a link - var aStart = ``; - var aEnd = ''; - // Add class="text-to-image" to images token.attrPush(['class', 'text-to-image']); // Use the default renderer to render image markdown format - return aStart + self.renderToken(tokens, idx, options) + aEnd; + return self.renderToken(tokens, idx, options); }; // Render markdown @@ -435,8 +426,8 @@ if (chunk.startsWith("{") && chunk.endsWith("}")) { try { const responseAsJson = JSON.parse(chunk); - if (responseAsJson.imageUrl) { - rawResponse += `![${query}](${responseAsJson.imageUrl})`; + if (responseAsJson.image) { + rawResponse += `![${query}](data:image/png;base64,${responseAsJson.image})`; } if (responseAsJson.detail) { rawResponse += responseAsJson.detail; diff --git a/src/interface/obsidian/src/chat_modal.ts b/src/interface/obsidian/src/chat_modal.ts index 9786e45a..145bae50 100644 --- a/src/interface/obsidian/src/chat_modal.ts +++ b/src/interface/obsidian/src/chat_modal.ts @@ -109,7 +109,7 @@ export class KhojChatModal extends Modal { if (!message) { return; } else if (intentType === "text-to-image") { - let imageMarkdown = `![](${message})`; + let imageMarkdown = `![](data:image/png;base64,${message})`; this.renderMessage(chatEl, imageMarkdown, sender, dt); return; } else if (!context) { @@ -317,8 +317,8 @@ export class KhojChatModal extends Modal { if (responseText.startsWith("{") && responseText.endsWith("}")) { try { const responseAsJson = JSON.parse(responseText); - if (responseAsJson.imageUrl) { - responseText = `![${query}](${responseAsJson.imageUrl})`; + if (responseAsJson.image) { + responseText = `![${query}](data:image/png;base64,${responseAsJson.image})`; } else if (responseAsJson.detail) { responseText = responseAsJson.detail; } diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html index 39cb6e77..97fdbebb 100644 --- a/src/khoj/interface/web/chat.html +++ b/src/khoj/interface/web/chat.html @@ -190,7 +190,7 @@ To get started, just start typing below. You can also type / to see a list of co function renderMessageWithReference(message, by, context=null, dt=null, onlineContext=null, intentType=null) { if (intentType === "text-to-image") { - let imageMarkdown = `![](${message})`; + let imageMarkdown = `![](data:image/png;base64,${message})`; renderMessage(imageMarkdown, by, dt); return; } @@ -263,20 +263,11 @@ To get started, just start typing below. You can also type / to see a list of co md.renderer.rules.image = function(tokens, idx, options, env, self) { let token = tokens[idx]; - // Get image source url. Only render images with src links - let srcIndex = token.attrIndex('src'); - if (srcIndex < 0) { return ''; } - let src = token.attrs[srcIndex][1]; - - // Wrap the image in a link - var aStart = ``; - var aEnd = ''; - // Add class="text-to-image" to images token.attrPush(['class', 'text-to-image']); // Use the default renderer to render image markdown format - return aStart + self.renderToken(tokens, idx, options) + aEnd; + return self.renderToken(tokens, idx, options); }; // Render markdown @@ -440,8 +431,8 @@ To get started, just start typing below. You can also type / to see a list of co if (chunk.startsWith("{") && chunk.endsWith("}")) { try { const responseAsJson = JSON.parse(chunk); - if (responseAsJson.imageUrl) { - rawResponse += `![${query}](${responseAsJson.imageUrl})`; + if (responseAsJson.image) { + rawResponse += `![${query}](data:image/png;base64,${responseAsJson.image})`; } if (responseAsJson.detail) { rawResponse += responseAsJson.detail; diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index ae31c260..d53f023a 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -706,9 +706,9 @@ async def chat( status_code=200, ) elif conversation_command == ConversationCommand.Image: - image_url, status_code = await text_to_image(q) - await sync_to_async(save_to_conversation_log)(q, image_url, user, meta_log, intent_type="text-to-image") - content_obj = {"imageUrl": image_url, "intentType": "text-to-image"} + image, status_code = await text_to_image(q) + await sync_to_async(save_to_conversation_log)(q, image, user, meta_log, intent_type="text-to-image") + content_obj = {"image": image, "intentType": "text-to-image"} return Response(content=json.dumps(content_obj), media_type="application/json", status_code=status_code) # Get the (streamed) chat response from the LLM of choice. diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 4e43289f..f34ae815 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -252,7 +252,7 @@ def generate_chat_response( async def text_to_image(message: str) -> Tuple[Optional[str], int]: status_code = 200 - image_url = None + image = None # Send the audio data to the Whisper API text_to_image_config = await ConversationAdapters.aget_text_to_image_model_config() @@ -264,13 +264,13 @@ async def text_to_image(message: str) -> Tuple[Optional[str], int]: client = openai.OpenAI(api_key=openai_chat_config.api_key) text2image_model = text_to_image_config.model_name try: - response = client.images.generate(prompt=message, model=text2image_model) - image_url = response.data[0].url + response = client.images.generate(prompt=message, model=text2image_model, response_format="b64_json") + image = response.data[0].b64_json except openai.OpenAIError as e: logger.error(f"Image Generation failed with {e.http_status}: {e.error}") status_code = 500 - return image_url, status_code + return image, status_code class ApiUserRateLimiter: