diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html
index 92a11ebd..e039c6cb 100644
--- a/src/interface/desktop/chat.html
+++ b/src/interface/desktop/chat.html
@@ -181,7 +181,7 @@
function renderMessageWithReference(message, by, context=null, dt=null, onlineContext=null, intentType=null) {
if (intentType === "text-to-image") {
- let imageMarkdown = ``;
+ let imageMarkdown = ``;
renderMessage(imageMarkdown, by, dt);
return;
}
@@ -254,20 +254,11 @@
md.renderer.rules.image = function(tokens, idx, options, env, self) {
let token = tokens[idx];
- // Get image source url. Only render images with src links
- let srcIndex = token.attrIndex('src');
- if (srcIndex < 0) { return ''; }
- let src = token.attrs[srcIndex][1];
-
- // Wrap the image in a link
- var aStart = ``;
- var aEnd = '';
-
// Add class="text-to-image" to images
token.attrPush(['class', 'text-to-image']);
// Use the default renderer to render image markdown format
- return aStart + self.renderToken(tokens, idx, options) + aEnd;
+ return self.renderToken(tokens, idx, options);
};
// Render markdown
@@ -435,8 +426,8 @@
if (chunk.startsWith("{") && chunk.endsWith("}")) {
try {
const responseAsJson = JSON.parse(chunk);
- if (responseAsJson.imageUrl) {
- rawResponse += ``;
+ if (responseAsJson.image) {
+ rawResponse += ``;
}
if (responseAsJson.detail) {
rawResponse += responseAsJson.detail;
diff --git a/src/interface/obsidian/src/chat_modal.ts b/src/interface/obsidian/src/chat_modal.ts
index 9786e45a..145bae50 100644
--- a/src/interface/obsidian/src/chat_modal.ts
+++ b/src/interface/obsidian/src/chat_modal.ts
@@ -109,7 +109,7 @@ export class KhojChatModal extends Modal {
if (!message) {
return;
} else if (intentType === "text-to-image") {
- let imageMarkdown = ``;
+ let imageMarkdown = ``;
this.renderMessage(chatEl, imageMarkdown, sender, dt);
return;
} else if (!context) {
@@ -317,8 +317,8 @@ export class KhojChatModal extends Modal {
if (responseText.startsWith("{") && responseText.endsWith("}")) {
try {
const responseAsJson = JSON.parse(responseText);
- if (responseAsJson.imageUrl) {
- responseText = ``;
+ if (responseAsJson.image) {
+ responseText = ``;
} else if (responseAsJson.detail) {
responseText = responseAsJson.detail;
}
diff --git a/src/khoj/interface/web/chat.html b/src/khoj/interface/web/chat.html
index 39cb6e77..97fdbebb 100644
--- a/src/khoj/interface/web/chat.html
+++ b/src/khoj/interface/web/chat.html
@@ -190,7 +190,7 @@ To get started, just start typing below. You can also type / to see a list of co
function renderMessageWithReference(message, by, context=null, dt=null, onlineContext=null, intentType=null) {
if (intentType === "text-to-image") {
- let imageMarkdown = ``;
+ let imageMarkdown = ``;
renderMessage(imageMarkdown, by, dt);
return;
}
@@ -263,20 +263,11 @@ To get started, just start typing below. You can also type / to see a list of co
md.renderer.rules.image = function(tokens, idx, options, env, self) {
let token = tokens[idx];
- // Get image source url. Only render images with src links
- let srcIndex = token.attrIndex('src');
- if (srcIndex < 0) { return ''; }
- let src = token.attrs[srcIndex][1];
-
- // Wrap the image in a link
- var aStart = ``;
- var aEnd = '';
-
// Add class="text-to-image" to images
token.attrPush(['class', 'text-to-image']);
// Use the default renderer to render image markdown format
- return aStart + self.renderToken(tokens, idx, options) + aEnd;
+ return self.renderToken(tokens, idx, options);
};
// Render markdown
@@ -440,8 +431,8 @@ To get started, just start typing below. You can also type / to see a list of co
if (chunk.startsWith("{") && chunk.endsWith("}")) {
try {
const responseAsJson = JSON.parse(chunk);
- if (responseAsJson.imageUrl) {
- rawResponse += ``;
+ if (responseAsJson.image) {
+ rawResponse += ``;
}
if (responseAsJson.detail) {
rawResponse += responseAsJson.detail;
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index ae31c260..d53f023a 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -706,9 +706,9 @@ async def chat(
status_code=200,
)
elif conversation_command == ConversationCommand.Image:
- image_url, status_code = await text_to_image(q)
- await sync_to_async(save_to_conversation_log)(q, image_url, user, meta_log, intent_type="text-to-image")
- content_obj = {"imageUrl": image_url, "intentType": "text-to-image"}
+ image, status_code = await text_to_image(q)
+ await sync_to_async(save_to_conversation_log)(q, image, user, meta_log, intent_type="text-to-image")
+ content_obj = {"image": image, "intentType": "text-to-image"}
return Response(content=json.dumps(content_obj), media_type="application/json", status_code=status_code)
# Get the (streamed) chat response from the LLM of choice.
diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py
index 4e43289f..f34ae815 100644
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@@ -252,7 +252,7 @@ def generate_chat_response(
async def text_to_image(message: str) -> Tuple[Optional[str], int]:
status_code = 200
- image_url = None
+ image = None
# Send the audio data to the Whisper API
text_to_image_config = await ConversationAdapters.aget_text_to_image_model_config()
@@ -264,13 +264,13 @@ async def text_to_image(message: str) -> Tuple[Optional[str], int]:
client = openai.OpenAI(api_key=openai_chat_config.api_key)
text2image_model = text_to_image_config.model_name
try:
- response = client.images.generate(prompt=message, model=text2image_model)
- image_url = response.data[0].url
+ response = client.images.generate(prompt=message, model=text2image_model, response_format="b64_json")
+ image = response.data[0].b64_json
except openai.OpenAIError as e:
logger.error(f"Image Generation failed with {e.http_status}: {e.error}")
status_code = 500
- return image_url, status_code
+ return image, status_code
class ApiUserRateLimiter: