mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-03 05:29:12 +00:00
Add support for text to speech and speech to text (#863)
- Add support for text to speech, speech to text. Add loading and responsive indicators to reflect state. - When streaming for speech to text, show incremental transcription in the message input field - When streaming text to speech, and a pause button in the chat message to allow user to stop playback
This commit is contained in:
@@ -10,9 +10,10 @@ import 'katex/dist/katex.min.css';
|
||||
|
||||
import { TeaserReferencesSection, constructAllReferences } from '../referencePanel/referencePanel';
|
||||
|
||||
import { ThumbsUp, ThumbsDown, Copy, Brain, Cloud, Folder, Book, Aperture, SpeakerHigh, MagnifyingGlass } from '@phosphor-icons/react';
|
||||
import { ThumbsUp, ThumbsDown, Copy, Brain, Cloud, Folder, Book, Aperture, SpeakerHigh, MagnifyingGlass, Pause } from '@phosphor-icons/react';
|
||||
|
||||
import * as DomPurify from 'dompurify';
|
||||
import { InlineLoading } from '../loading/loading';
|
||||
|
||||
const md = new markdownIt({
|
||||
html: true,
|
||||
@@ -206,8 +207,16 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
const [copySuccess, setCopySuccess] = useState<boolean>(false);
|
||||
const [isHovering, setIsHovering] = useState<boolean>(false);
|
||||
const [markdownRendered, setMarkdownRendered] = useState<string>('');
|
||||
const [isPlaying, setIsPlaying] = useState<boolean>(false);
|
||||
const [interrupted, setInterrupted] = useState<boolean>(false);
|
||||
|
||||
const interruptedRef = useRef<boolean>(false);
|
||||
const messageRef = useRef<HTMLDivElement>(null);
|
||||
|
||||
useEffect(() => {
|
||||
interruptedRef.current = interrupted;
|
||||
}, [interrupted]);
|
||||
|
||||
useEffect(() => {
|
||||
let message = props.chatMessage.message;
|
||||
|
||||
@@ -278,8 +287,8 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
function formatDate(timestamp: string) {
|
||||
// Format date in HH:MM, DD MMM YYYY format
|
||||
let date = new Date(timestamp + "Z");
|
||||
let time_string = date.toLocaleTimeString('en-IN', { hour: '2-digit', minute: '2-digit', hour12: true }).toUpperCase();
|
||||
let date_string = date.toLocaleString('en-IN', { year: 'numeric', month: 'short', day: '2-digit'}).replaceAll('-', ' ');
|
||||
let time_string = date.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: true }).toUpperCase();
|
||||
let date_string = date.toLocaleString('en-US', { year: 'numeric', month: 'short', day: '2-digit' }).replaceAll('-', ' ');
|
||||
return `${time_string} on ${date_string}`;
|
||||
}
|
||||
|
||||
@@ -330,6 +339,79 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
return classes.join(' ');
|
||||
}
|
||||
|
||||
async function playTextToSpeech() {
|
||||
// Browser native speech API
|
||||
// const utterance = new SpeechSynthesisUtterance(props.chatMessage.message);
|
||||
// speechSynthesis.speak(utterance);
|
||||
|
||||
// Using the Khoj speech API
|
||||
// Break the message up into chunks of sentences
|
||||
const sentenceRegex = /[^.!?]+[.!?]*/g;
|
||||
const chunks = props.chatMessage.message.match(sentenceRegex) || [];
|
||||
|
||||
if (!chunks) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (chunks.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!chunks[0]) {
|
||||
return;
|
||||
}
|
||||
setIsPlaying(true);
|
||||
|
||||
let nextBlobPromise = fetchBlob(chunks[0]);
|
||||
|
||||
for (let i = 0; i < chunks.length; i++) {
|
||||
if (interruptedRef.current) {
|
||||
break; // Exit the loop if interrupted
|
||||
}
|
||||
|
||||
const currentBlobPromise = nextBlobPromise;
|
||||
if (i < chunks.length - 1) {
|
||||
nextBlobPromise = fetchBlob(chunks[i + 1]);
|
||||
}
|
||||
|
||||
try {
|
||||
const blob = await currentBlobPromise;
|
||||
const url = URL.createObjectURL(blob);
|
||||
await playAudio(url);
|
||||
} catch (error) {
|
||||
console.error('Error:', error);
|
||||
break; // Exit the loop on error
|
||||
}
|
||||
}
|
||||
|
||||
setIsPlaying(false);
|
||||
setInterrupted(false); // Reset interrupted state after playback
|
||||
}
|
||||
|
||||
async function fetchBlob(text: string) {
|
||||
const response = await fetch(`/api/chat/speech?text=${encodeURIComponent(text)}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json'
|
||||
},
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error('Network response was not ok');
|
||||
}
|
||||
|
||||
return await response.blob();
|
||||
}
|
||||
|
||||
function playAudio(url: string) {
|
||||
return new Promise((resolve, reject) => {
|
||||
const audio = new Audio(url);
|
||||
audio.onended = resolve;
|
||||
audio.onerror = reject;
|
||||
audio.play();
|
||||
});
|
||||
}
|
||||
|
||||
const allReferences = constructAllReferences(props.chatMessage.context, props.chatMessage.onlineContext);
|
||||
|
||||
return (
|
||||
@@ -349,7 +431,7 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
</div>
|
||||
<div className={styles.chatFooter}>
|
||||
{
|
||||
(isHovering || props.isMobileWidth || props.isLastMessage) &&
|
||||
(isHovering || props.isMobileWidth || props.isLastMessage || isPlaying) &&
|
||||
(
|
||||
<>
|
||||
<div title={formatDate(props.chatMessage.created)} className={`text-gray-400 relative top-0 left-4`}>
|
||||
@@ -359,9 +441,17 @@ export default function ChatMessage(props: ChatMessageProps) {
|
||||
{
|
||||
(props.chatMessage.by === "khoj") &&
|
||||
(
|
||||
<button title="Speak" onClick={(event) => console.log("speaker")}>
|
||||
<SpeakerHigh alt="Speak Message" color='hsl(var(--muted-foreground))' />
|
||||
</button>
|
||||
isPlaying ?
|
||||
(
|
||||
interrupted ?
|
||||
<InlineLoading iconClassName='p-0' className='m-0' />
|
||||
: <button title="Pause Speech" onClick={(event) => setInterrupted(true)}>
|
||||
<Pause alt="Pause Message" color='hsl(var(--muted-foreground))' />
|
||||
</button>
|
||||
)
|
||||
: <button title="Speak" onClick={(event) => playTextToSpeech()}>
|
||||
<SpeakerHigh alt="Speak Message" color='hsl(var(--muted-foreground))' />
|
||||
</button>
|
||||
)
|
||||
}
|
||||
<button title="Copy" className={`${styles.copyButton}`} onClick={() => {
|
||||
|
||||
Reference in New Issue
Block a user