From 093e276908071a71fc0d1b86ea1da52176fd803f Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 26 Jun 2024 15:29:09 +0530 Subject: [PATCH] Enable Voice chat in Khoj Obsidian plugin - Automatically carry out voice chats with Khoj from within Obsidian When send voice message, Khoj will auto respond with voice as well - Listen to past Khoj messages as speech - Add circular loading spinner to use while message is being converted to speech --- src/interface/obsidian/src/chat_view.ts | 95 ++++++++++++++++++++++--- src/interface/obsidian/styles.css | 34 +++++++++ 2 files changed, 119 insertions(+), 10 deletions(-) diff --git a/src/interface/obsidian/src/chat_view.ts b/src/interface/obsidian/src/chat_view.ts index e4e9fd8d..8e0d4d6d 100644 --- a/src/interface/obsidian/src/chat_view.ts +++ b/src/interface/obsidian/src/chat_view.ts @@ -61,7 +61,7 @@ export class KhojChatView extends KhojPaneView { return "message-circle"; } - async chat() { + async chat(isVoice: boolean = false) { // Get text in chat input element let input_el = this.contentEl.getElementsByClassName("khoj-chat-input")[0]; @@ -72,7 +72,7 @@ export class KhojChatView extends KhojPaneView { this.autoResize(); // Get and render chat response to user message - await this.getChatResponse(user_message); + await this.getChatResponse(user_message, isVoice); } async onOpen() { @@ -294,6 +294,60 @@ export class KhojChatView extends KhojPaneView { return referenceButton; } + textToSpeech(message: string, event: MouseEvent | null = null): void { + // Replace the speaker with a loading icon. + let loader = document.createElement("span"); + loader.classList.add("loader"); + + let speechButton: HTMLButtonElement; + let speechIcon: Element; + + if (event === null) { + // Pick the last speech button if none is provided + let speechButtons = document.getElementsByClassName("speech-button"); + speechButton = speechButtons[speechButtons.length - 1] as HTMLButtonElement; + + let speechIcons = document.getElementsByClassName("speech-icon"); + speechIcon = speechIcons[speechIcons.length - 1]; + } else { + speechButton = event.currentTarget as HTMLButtonElement; + speechIcon = event.target as Element; + } + + speechButton.innerHTML = ""; + speechButton.appendChild(loader); + speechButton.disabled = true; + + const context = new AudioContext(); + let textToSpeechApi = `${this.setting.khojUrl}/api/chat/speech?text=${encodeURIComponent(message)}`; + fetch(textToSpeechApi, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + "Authorization": `Bearer ${this.setting.khojApiKey}`, + }, + }) + .then(response => response.arrayBuffer()) + .then(arrayBuffer => context.decodeAudioData(arrayBuffer)) + .then(audioBuffer => { + const source = context.createBufferSource(); + source.buffer = audioBuffer; + source.connect(context.destination); + source.start(0); + source.onended = function() { + speechButton.innerHTML = ""; + speechButton.appendChild(speechIcon); + speechButton.disabled = false; + }; + }) + .catch(err => { + console.error("Error playing speech:", err); + speechButton.innerHTML = ""; + speechButton.appendChild(speechIcon); + speechButton.disabled = false; // Consider enabling the button again to allow retrying + }); + } + formatHTMLMessage(message: string, raw = false, willReplace = true) { // Remove any text between [INST] and tags. These are spurious instructions for some AI chat model. message = message.replace(/\[INST\].+(<\/s>)?/g, ''); @@ -465,7 +519,6 @@ export class KhojChatView extends KhojPaneView { copyButton.title = "Copy Message to Clipboard"; setIcon(copyButton, "copy-plus"); copyButton.addEventListener('click', createCopyParentText(message)); - chat_message_body_text_el.append(copyButton); // Add button to paste into current buffer let pasteToFile = this.contentEl.createEl('button'); @@ -473,7 +526,25 @@ export class KhojChatView extends KhojPaneView { pasteToFile.title = "Paste Message to File"; setIcon(pasteToFile, "clipboard-paste"); pasteToFile.addEventListener('click', (event) => { pasteTextAtCursor(createCopyParentText(message, 'clipboard-paste')(event)); }); - chat_message_body_text_el.append(pasteToFile); + + // Only enable the speech feature if the user is subscribed + let speechButton = null; + + if (this.setting.userInfo?.is_active) { + // Create a speech button icon to play the message out loud + speechButton = this.contentEl.createEl('button'); + speechButton.classList.add("chat-action-button", "speech-button"); + speechButton.title = "Listen to Message"; + setIcon(speechButton, "speech") + speechButton.addEventListener('click', (event) => this.textToSpeech(message, event)); + } + + // Append buttons to parent element + chat_message_body_text_el.append(copyButton, pasteToFile); + + if (speechButton) { + chat_message_body_text_el.append(speechButton); + } } formatDate(date: Date): string { @@ -727,7 +798,7 @@ export class KhojChatView extends KhojPaneView { return true; } - async readChatStream(response: Response, responseElement: HTMLDivElement): Promise { + async readChatStream(response: Response, responseElement: HTMLDivElement, isVoice: boolean = false): Promise { // Exit if response body is empty if (response.body == null) return; @@ -737,8 +808,12 @@ export class KhojChatView extends KhojPaneView { while (true) { const { value, done } = await reader.read(); - // Break if the stream is done - if (done) break; + if (done) { + // Automatically respond with voice if the subscribed user has sent voice message + if (isVoice && this.setting.userInfo?.is_active) this.textToSpeech(this.result); + // Break if the stream is done + break; + } let responseText = decoder.decode(value); if (responseText.includes("### compiled references:")) { @@ -756,7 +831,7 @@ export class KhojChatView extends KhojPaneView { } } - async getChatResponse(query: string | undefined | null): Promise { + async getChatResponse(query: string | undefined | null, isVoice: boolean = false): Promise { // Exit if query is empty if (!query || query === "") return; @@ -835,7 +910,7 @@ export class KhojChatView extends KhojPaneView { } } else { // Stream and render chat response - await this.readChatStream(response, responseElement); + await this.readChatStream(response, responseElement, isVoice); } } catch (err) { console.log(`Khoj chat response failed with\n${err}`); @@ -947,7 +1022,7 @@ export class KhojChatView extends KhojPaneView { sendImg.addEventListener('click', async (_) => { await this.chat() }); // Send message - this.chat(); + this.chat(true); }, 3000); }; diff --git a/src/interface/obsidian/styles.css b/src/interface/obsidian/styles.css index cc0bfb30..8e3d2c6b 100644 --- a/src/interface/obsidian/styles.css +++ b/src/interface/obsidian/styles.css @@ -507,6 +507,40 @@ img.copy-icon { height: 16px; } +/* Circular Loading Spinner */ +.loader { + width: 18px; + height: 18px; + border: 3px solid #FFF; + border-radius: 50%; + display: inline-block; + position: relative; + box-sizing: border-box; + animation: rotation 1s linear infinite; +} +.loader::after { + content: ''; + box-sizing: border-box; + position: absolute; + left: 50%; + top: 50%; + transform: translate(-50%, -50%); + width: 18px; + height: 18px; + border-radius: 50%; + border: 3px solid transparent; + border-bottom-color: var(--flower); +} + +@keyframes rotation { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} + /* Loading Spinner */ .lds-ellipsis { display: inline-block;