diff --git a/src/interface/web/app/chat/layout.tsx b/src/interface/web/app/chat/layout.tsx index 1da0e7a7..341960f7 100644 --- a/src/interface/web/app/chat/layout.tsx +++ b/src/interface/web/app/chat/layout.tsx @@ -18,6 +18,7 @@ export default function RootLayout({ (null); const [showLoginPrompt, setShowLoginPrompt] = useState(false); + const [recording, setRecording] = useState(false); + const [mediaRecorder, setMediaRecorder] = useState(null); + const [progressValue, setProgressValue] = useState(0); useEffect(() => { @@ -195,6 +203,83 @@ export default function ChatInputArea(props: ChatInputProps) { return } + // Assuming this function is added within the same context as the provided excerpt + async function startRecordingAndTranscribe() { + try { + const microphone = await navigator.mediaDevices.getUserMedia({ audio: true }); + const mediaRecorder = new MediaRecorder(microphone, { mimeType: 'audio/webm' }); + + const audioChunks: Blob[] = []; + + mediaRecorder.ondataavailable = async (event) => { + audioChunks.push(event.data); + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + const formData = new FormData(); + formData.append('file', audioBlob); + + // Send the incremental audio blob to the server + try { + const response = await fetch('/api/transcribe', { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + throw new Error('Network response was not ok'); + } + + const transcription = await response.json(); + setMessage(transcription.text.trim()); + } catch (error) { + console.error('Error sending audio to server:', error); + } + }; + + // Send an audio blob every 1.5 seconds + mediaRecorder.start(1500); + + mediaRecorder.onstop = async () => { + const audioBlob = new Blob(audioChunks, { type: 'audio/webm' }); + const formData = new FormData(); + formData.append('file', audioBlob); + + // Send the audio blob to the server + try { + const response = await fetch('/api/transcribe', { + method: 'POST', + body: formData, + }); + + if (!response.ok) { + throw new Error('Network response was not ok'); + } + + const transcription = await response.json(); + mediaRecorder.stream.getTracks().forEach(track => track.stop()); + setMediaRecorder(null); + setMessage(transcription.text.trim()); + } catch (error) { + console.error('Error sending audio to server:', error); + } + }; + + setMediaRecorder(mediaRecorder); + } catch (error) { + console.error("Error getting microphone", error); + } + } + + useEffect(() => { + if (!recording && mediaRecorder) { + mediaRecorder.stop(); + } + + if (recording && !mediaRecorder) { + startRecordingAndTranscribe(); + } + + }, [recording]); + return ( <> { @@ -321,21 +406,58 @@ export default function ChatInputArea(props: ChatInputProps) { } }} onChange={(e) => setMessage(e.target.value)} - disabled={props.sendDisabled} /> + disabled={props.sendDisabled || recording} /> - + { + recording ? + + + + + + + Click to stop recording and transcribe your voice. + + + + : + ( + mediaRecorder ? + + : + < TooltipProvider > + + + + + + Click to start recording and transcribe your voice. + + + + ) + } - + ) } diff --git a/src/interface/web/app/components/chatMessage/chatMessage.tsx b/src/interface/web/app/components/chatMessage/chatMessage.tsx index e4779f73..84108020 100644 --- a/src/interface/web/app/components/chatMessage/chatMessage.tsx +++ b/src/interface/web/app/components/chatMessage/chatMessage.tsx @@ -10,9 +10,10 @@ import 'katex/dist/katex.min.css'; import { TeaserReferencesSection, constructAllReferences } from '../referencePanel/referencePanel'; -import { ThumbsUp, ThumbsDown, Copy, Brain, Cloud, Folder, Book, Aperture, SpeakerHigh, MagnifyingGlass } from '@phosphor-icons/react'; +import { ThumbsUp, ThumbsDown, Copy, Brain, Cloud, Folder, Book, Aperture, SpeakerHigh, MagnifyingGlass, Pause } from '@phosphor-icons/react'; import * as DomPurify from 'dompurify'; +import { InlineLoading } from '../loading/loading'; const md = new markdownIt({ html: true, @@ -206,8 +207,16 @@ export default function ChatMessage(props: ChatMessageProps) { const [copySuccess, setCopySuccess] = useState(false); const [isHovering, setIsHovering] = useState(false); const [markdownRendered, setMarkdownRendered] = useState(''); + const [isPlaying, setIsPlaying] = useState(false); + const [interrupted, setInterrupted] = useState(false); + + const interruptedRef = useRef(false); const messageRef = useRef(null); + useEffect(() => { + interruptedRef.current = interrupted; + }, [interrupted]); + useEffect(() => { let message = props.chatMessage.message; @@ -278,8 +287,8 @@ export default function ChatMessage(props: ChatMessageProps) { function formatDate(timestamp: string) { // Format date in HH:MM, DD MMM YYYY format let date = new Date(timestamp + "Z"); - let time_string = date.toLocaleTimeString('en-IN', { hour: '2-digit', minute: '2-digit', hour12: true }).toUpperCase(); - let date_string = date.toLocaleString('en-IN', { year: 'numeric', month: 'short', day: '2-digit'}).replaceAll('-', ' '); + let time_string = date.toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: true }).toUpperCase(); + let date_string = date.toLocaleString('en-US', { year: 'numeric', month: 'short', day: '2-digit' }).replaceAll('-', ' '); return `${time_string} on ${date_string}`; } @@ -330,6 +339,79 @@ export default function ChatMessage(props: ChatMessageProps) { return classes.join(' '); } + async function playTextToSpeech() { + // Browser native speech API + // const utterance = new SpeechSynthesisUtterance(props.chatMessage.message); + // speechSynthesis.speak(utterance); + + // Using the Khoj speech API + // Break the message up into chunks of sentences + const sentenceRegex = /[^.!?]+[.!?]*/g; + const chunks = props.chatMessage.message.match(sentenceRegex) || []; + + if (!chunks) { + return; + } + + if (chunks.length === 0) { + return; + } + + if (!chunks[0]) { + return; + } + setIsPlaying(true); + + let nextBlobPromise = fetchBlob(chunks[0]); + + for (let i = 0; i < chunks.length; i++) { + if (interruptedRef.current) { + break; // Exit the loop if interrupted + } + + const currentBlobPromise = nextBlobPromise; + if (i < chunks.length - 1) { + nextBlobPromise = fetchBlob(chunks[i + 1]); + } + + try { + const blob = await currentBlobPromise; + const url = URL.createObjectURL(blob); + await playAudio(url); + } catch (error) { + console.error('Error:', error); + break; // Exit the loop on error + } + } + + setIsPlaying(false); + setInterrupted(false); // Reset interrupted state after playback + } + + async function fetchBlob(text: string) { + const response = await fetch(`/api/chat/speech?text=${encodeURIComponent(text)}`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + }); + + if (!response.ok) { + throw new Error('Network response was not ok'); + } + + return await response.blob(); + } + + function playAudio(url: string) { + return new Promise((resolve, reject) => { + const audio = new Audio(url); + audio.onended = resolve; + audio.onerror = reject; + audio.play(); + }); + } + const allReferences = constructAllReferences(props.chatMessage.context, props.chatMessage.onlineContext); return ( @@ -349,7 +431,7 @@ export default function ChatMessage(props: ChatMessageProps) {
{ - (isHovering || props.isMobileWidth || props.isLastMessage) && + (isHovering || props.isMobileWidth || props.isLastMessage || isPlaying) && ( <>
@@ -359,9 +441,17 @@ export default function ChatMessage(props: ChatMessageProps) { { (props.chatMessage.by === "khoj") && ( - + isPlaying ? + ( + interrupted ? + + : + ) + : ) } ) } diff --git a/src/interface/web/app/components/suggestions/suggestionCard.tsx b/src/interface/web/app/components/suggestions/suggestionCard.tsx index feb8c1bc..b2bff3e8 100644 --- a/src/interface/web/app/components/suggestions/suggestionCard.tsx +++ b/src/interface/web/app/components/suggestions/suggestionCard.tsx @@ -9,6 +9,7 @@ import { import styles from "./suggestions.module.css"; + import { getIconFromIconName } from "@/app/common/iconUtils"; diff --git a/src/interface/web/app/layout.tsx b/src/interface/web/app/layout.tsx index 57aaa673..1acd4a3f 100644 --- a/src/interface/web/app/layout.tsx +++ b/src/interface/web/app/layout.tsx @@ -18,6 +18,7 @@ export default function RootLayout({ diff --git a/src/interface/web/tailwind.config.ts b/src/interface/web/tailwind.config.ts index 25a10d1f..c83729f4 100644 --- a/src/interface/web/tailwind.config.ts +++ b/src/interface/web/tailwind.config.ts @@ -2,10 +2,6 @@ import type { Config } from "tailwindcss" const config = { safelist: [ - { - pattern: /to-(blue|yellow|green|pink|purple)-(50|100|200|950)/, - variants: ['dark'], - }, ], darkMode: ["class"], content: [ diff --git a/src/interface/web/yarn.lock b/src/interface/web/yarn.lock index 0d719685..2546d87b 100644 --- a/src/interface/web/yarn.lock +++ b/src/interface/web/yarn.lock @@ -4363,6 +4363,7 @@ string-argv@~0.3.2: integrity sha512-aqD2Q0144Z+/RqG52NeHEkZauTAUWJO8c6yTftGJKO3Tja5tUgIfmIl6kExvhtxSDP7fXB6DvzkfMpCd/F3G+Q== "string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0: + name string-width-cjs version "4.2.3" resolved "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index c14017a6..43341bf5 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -222,10 +222,10 @@ async def transcribe( common: CommonQueryParams, file: UploadFile = File(...), rate_limiter_per_minute=Depends( - ApiUserRateLimiter(requests=1, subscribed_requests=10, window=60, slug="transcribe_minute") + ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="transcribe_minute") ), rate_limiter_per_day=Depends( - ApiUserRateLimiter(requests=10, subscribed_requests=600, window=60 * 60 * 24, slug="transcribe_day") + ApiUserRateLimiter(requests=60, subscribed_requests=600, window=60 * 60 * 24, slug="transcribe_day") ), ): user: KhojUser = request.user.object diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index 8a288b71..fa575622 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -155,10 +155,10 @@ async def text_to_speech( common: CommonQueryParams, text: str, rate_limiter_per_minute=Depends( - ApiUserRateLimiter(requests=5, subscribed_requests=20, window=60, slug="chat_minute") + ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="chat_minute") ), rate_limiter_per_day=Depends( - ApiUserRateLimiter(requests=5, subscribed_requests=300, window=60 * 60 * 24, slug="chat_day") + ApiUserRateLimiter(requests=50, subscribed_requests=300, window=60 * 60 * 24, slug="chat_day") ), ) -> Response: voice_model = await ConversationAdapters.aget_voice_model_config(request.user.object) diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 9bff0dc6..277b049e 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -804,7 +804,7 @@ async def text_to_image( with timer("Improve the original user query", logger): if send_status_func: - await send_status_func("**✍🏽 Enhancing the Painting Prompt**") + await send_status_func("**Enhancing the Painting Prompt**") improved_image_prompt = await generate_better_image_prompt( message, chat_history,