From 63675b32992c3f518daf827c13d0ac161e89dce4 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Wed, 22 Nov 2023 02:19:22 -0800 Subject: [PATCH] Speak to Khoj from the Desktop client - Use icons to style speech to text recording state --- .../desktop/assets/icons/microphone-solid.svg | 1 + .../desktop/assets/icons/stop-solid.svg | 37 ++++++++ src/interface/desktop/chat.html | 93 ++++++++++++++++++- 3 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 src/interface/desktop/assets/icons/microphone-solid.svg create mode 100644 src/interface/desktop/assets/icons/stop-solid.svg diff --git a/src/interface/desktop/assets/icons/microphone-solid.svg b/src/interface/desktop/assets/icons/microphone-solid.svg new file mode 100644 index 00000000..3fc4b91d --- /dev/null +++ b/src/interface/desktop/assets/icons/microphone-solid.svg @@ -0,0 +1 @@ + diff --git a/src/interface/desktop/assets/icons/stop-solid.svg b/src/interface/desktop/assets/icons/stop-solid.svg new file mode 100644 index 00000000..a9aaba28 --- /dev/null +++ b/src/interface/desktop/assets/icons/stop-solid.svg @@ -0,0 +1,37 @@ + + + + + + + diff --git a/src/interface/desktop/chat.html b/src/interface/desktop/chat.html index 4997ef99..6c6d1ca1 100644 --- a/src/interface/desktop/chat.html +++ b/src/interface/desktop/chat.html @@ -377,6 +377,62 @@ chat(); } } + + let mediaRecorder; + async function speechToText() { + const speakButton = document.getElementById('speak-button'); + const speakButtonImg = document.getElementById('speak-button-img'); + const chatInput = document.getElementById('chat-input'); + + const hostURL = await window.hostURLAPI.getURL(); + let url = `${hostURL}/api/speak?client=desktop`; + const khojToken = await window.tokenAPI.getToken(); + const headers = { 'Authorization': `Bearer ${khojToken}` }; + + const sendToServer = (audioBlob) => { + const formData = new FormData(); + formData.append('file', audioBlob); + + fetch(url, { method: 'POST', body: formData, headers}) + .then(response => response.ok ? response.json() : Promise.reject(response)) + .then(data => { chatInput.value += data.text; }) + .catch(err => err.status == 422 ? console.error("Configure speech-to-text model on server.") : console.error("Failed to transcribe audio")); + }; + + const handleRecording = (stream) => { + const audioChunks = []; + const recordingConfig = { mimeType: 'audio/webm' }; + mediaRecorder = new MediaRecorder(stream, recordingConfig); + + mediaRecorder.addEventListener("dataavailable", function(event) { + if (event.data.size > 0) audioChunks.push(event.data); + }); + + mediaRecorder.addEventListener("stop", function() { + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + sendToServer(audioBlob); + }); + + mediaRecorder.start(); + speakButtonImg.src = './assets/icons/stop-solid.svg'; + speakButtonImg.alt = 'Stop Speaking'; + }; + + // Toggle recording + if (!mediaRecorder || mediaRecorder.state === 'inactive') { + navigator.mediaDevices + .getUserMedia({ audio: true }) + .then(handleRecording) + .catch((e) => { + console.error(e); + }); + } else if (mediaRecorder.state === 'recording') { + mediaRecorder.stop(); + speakButtonImg.src = './assets/icons/microphone-solid.svg'; + speakButtonImg.alt = 'Speak'; + } + } +
@@ -400,7 +456,12 @@ @@ -514,15 +575,17 @@ #chat-footer { padding: 0; + margin: 8px; display: grid; grid-template-columns: minmax(70px, 100%); grid-column-gap: 10px; grid-row-gap: 10px; } - #chat-footer > * { - padding: 15px; - border-radius: 5px; - border: 1px solid #475569; + #input-row { + display: grid; + grid-template-columns: auto 32px; + grid-column-gap: 10px; + grid-row-gap: 10px; background: #f9fafc } .option:hover { @@ -543,6 +606,26 @@ #chat-input:focus { outline: none !important; } + #speak-button { + background: var(--background-color); + border: none; + border-radius: 5px; + padding: 5px; + font-size: 14px; + font-weight: 300; + line-height: 1.5em; + cursor: pointer; + transition: background 0.3s ease-in-out; + } + #speak-button:hover { + background: var(--primary-hover); + } + #speak-button:active { + background: var(--primary-active); + } + #speak-button-img { + width: 24px; + } .option-enabled { box-shadow: 0 0 12px rgb(119, 156, 46);