mirror of
https://github.com/khoaliber/khoj.git
synced 2026-03-10 05:39:11 +00:00
Transcribe only when keyboard shortcut or button pressed in Obsidian
- Transcribe on holding Ctrl+s keyboard shortcut - Transcribe on holding the transcribe button pressed via mouse too - Make the transcribe button robust to inadvertent touches by using timeout - Do not transcribe, trigger auto-send on silences. Silence detection is super rudimentary, just blocks standard emanations by whisper when no speech
This commit is contained in:
@@ -24,13 +24,13 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
setting: KhojSetting;
|
setting: KhojSetting;
|
||||||
waitingForLocation: boolean;
|
waitingForLocation: boolean;
|
||||||
location: Location;
|
location: Location;
|
||||||
|
keyPressTimeout: NodeJS.Timeout | null = null;
|
||||||
|
|
||||||
constructor(leaf: WorkspaceLeaf, setting: KhojSetting) {
|
constructor(leaf: WorkspaceLeaf, setting: KhojSetting) {
|
||||||
super(leaf, setting);
|
super(leaf, setting);
|
||||||
|
|
||||||
// Register Modal Keybindings to send voice message
|
// Register chat view keybindings
|
||||||
this.scope = new Scope(this.app.scope);
|
this.scope = new Scope(this.app.scope);
|
||||||
this.scope.register(["Mod"], 's', async (event) => { await this.speechToText(event); });
|
|
||||||
|
|
||||||
this.waitingForLocation = true;
|
this.waitingForLocation = true;
|
||||||
|
|
||||||
@@ -124,6 +124,10 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
chatInput.addEventListener('input', (_) => { this.onChatInput() });
|
chatInput.addEventListener('input', (_) => { this.onChatInput() });
|
||||||
chatInput.addEventListener('keydown', (event) => { this.incrementalChat(event) });
|
chatInput.addEventListener('keydown', (event) => { this.incrementalChat(event) });
|
||||||
|
|
||||||
|
// Add event listeners for long press keybinding
|
||||||
|
this.contentEl.addEventListener('keydown', this.handleKeyDown.bind(this));
|
||||||
|
this.contentEl.addEventListener('keyup', this.handleKeyUp.bind(this));
|
||||||
|
|
||||||
let transcribe = inputRow.createEl("button", {
|
let transcribe = inputRow.createEl("button", {
|
||||||
text: "Transcribe",
|
text: "Transcribe",
|
||||||
attr: {
|
attr: {
|
||||||
@@ -131,7 +135,8 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
class: "khoj-transcribe khoj-input-row-button clickable-icon ",
|
class: "khoj-transcribe khoj-input-row-button clickable-icon ",
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
transcribe.addEventListener('mousedown', async (event) => { await this.speechToText(event) });
|
transcribe.addEventListener('mousedown', (event) => { this.startSpeechToText(event) });
|
||||||
|
transcribe.addEventListener('mouseup', async (event) => { await this.stopSpeechToText(event) });
|
||||||
transcribe.addEventListener('touchstart', async (event) => { await this.speechToText(event) });
|
transcribe.addEventListener('touchstart', async (event) => { await this.speechToText(event) });
|
||||||
transcribe.addEventListener('touchend', async (event) => { await this.speechToText(event) });
|
transcribe.addEventListener('touchend', async (event) => { await this.speechToText(event) });
|
||||||
transcribe.addEventListener('touchcancel', async (event) => { await this.speechToText(event) });
|
transcribe.addEventListener('touchcancel', async (event) => { await this.speechToText(event) });
|
||||||
@@ -165,6 +170,46 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
startSpeechToText(event: KeyboardEvent | MouseEvent | TouchEvent, timeout=200) {
|
||||||
|
if (!this.keyPressTimeout) {
|
||||||
|
this.keyPressTimeout = setTimeout(async () => {
|
||||||
|
// Reset auto send voice message timer, UI if running
|
||||||
|
if (this.sendMessageTimeout) {
|
||||||
|
// Stop the auto send voice message countdown timer UI
|
||||||
|
clearTimeout(this.sendMessageTimeout);
|
||||||
|
const sendButton = <HTMLButtonElement>this.contentEl.getElementsByClassName("khoj-chat-send")[0]
|
||||||
|
setIcon(sendButton, "arrow-up-circle")
|
||||||
|
let sendImg = <SVGElement>sendButton.getElementsByClassName("lucide-arrow-up-circle")[0]
|
||||||
|
sendImg.addEventListener('click', async (_) => { await this.chat() });
|
||||||
|
// Reset chat input value
|
||||||
|
const chatInput = <HTMLTextAreaElement>this.contentEl.getElementsByClassName("khoj-chat-input")[0];
|
||||||
|
chatInput.value = "";
|
||||||
|
}
|
||||||
|
// Start new voice message
|
||||||
|
await this.speechToText(event);
|
||||||
|
}, timeout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async stopSpeechToText(event: KeyboardEvent | MouseEvent | TouchEvent) {
|
||||||
|
if (this.mediaRecorder) {
|
||||||
|
await this.speechToText(event);
|
||||||
|
}
|
||||||
|
if (this.keyPressTimeout) {
|
||||||
|
clearTimeout(this.keyPressTimeout);
|
||||||
|
this.keyPressTimeout = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
handleKeyDown(event: KeyboardEvent) {
|
||||||
|
// Start speech to text if keyboard shortcut is pressed
|
||||||
|
if (event.key === 's' && event.getModifierState('Control')) this.startSpeechToText(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
async handleKeyUp(event: KeyboardEvent) {
|
||||||
|
// Stop speech to text if keyboard shortcut is released
|
||||||
|
if (event.key === 's' && event.getModifierState('Control')) await this.stopSpeechToText(event);
|
||||||
|
}
|
||||||
|
|
||||||
processOnlineReferences(referenceSection: HTMLElement, onlineContext: any) {
|
processOnlineReferences(referenceSection: HTMLElement, onlineContext: any) {
|
||||||
let numOnlineReferences = 0;
|
let numOnlineReferences = 0;
|
||||||
for (let subquery in onlineContext) {
|
for (let subquery in onlineContext) {
|
||||||
@@ -993,9 +1038,19 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Parse response from Khoj backend
|
// Parse response from Khoj backend
|
||||||
|
let noSpeechText: string[] = [
|
||||||
|
"Thanks for watching!",
|
||||||
|
"Thanks for watching.",
|
||||||
|
"Thank you for watching!",
|
||||||
|
"Thank you for watching.",
|
||||||
|
"You",
|
||||||
|
"Bye."
|
||||||
|
];
|
||||||
|
let noSpeech: boolean = false;
|
||||||
if (response.status === 200) {
|
if (response.status === 200) {
|
||||||
console.log(response);
|
console.log(response);
|
||||||
chatInput.value += response.json.text.trimStart();
|
noSpeech = noSpeechText.includes(response.json.text.trimStart());
|
||||||
|
if (!noSpeech) chatInput.value += response.json.text.trimStart();
|
||||||
this.autoResize();
|
this.autoResize();
|
||||||
} else if (response.status === 501) {
|
} else if (response.status === 501) {
|
||||||
throw new Error("⛔️ Configure speech-to-text model on server.");
|
throw new Error("⛔️ Configure speech-to-text model on server.");
|
||||||
@@ -1005,8 +1060,8 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
throw new Error("⛔️ Failed to transcribe audio.");
|
throw new Error("⛔️ Failed to transcribe audio.");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't auto-send empty messages
|
// Don't auto-send empty messages or when no speech is detected
|
||||||
if (chatInput.value.length === 0) return;
|
if (chatInput.value.length === 0 || noSpeech) return;
|
||||||
|
|
||||||
// Show stop auto-send button. It stops auto-send when clicked
|
// Show stop auto-send button. It stops auto-send when clicked
|
||||||
setIcon(sendButton, "stop-circle");
|
setIcon(sendButton, "stop-circle");
|
||||||
@@ -1044,19 +1099,19 @@ export class KhojChatView extends KhojPaneView {
|
|||||||
});
|
});
|
||||||
|
|
||||||
this.mediaRecorder.start();
|
this.mediaRecorder.start();
|
||||||
setIcon(transcribeButton, "mic-off");
|
// setIcon(transcribeButton, "mic-off");
|
||||||
transcribeButton.classList.add("loading-encircle")
|
transcribeButton.classList.add("loading-encircle")
|
||||||
};
|
};
|
||||||
|
|
||||||
// Toggle recording
|
// Toggle recording
|
||||||
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive' || event.type === 'touchstart') {
|
if (!this.mediaRecorder || this.mediaRecorder.state === 'inactive' || event.type === 'touchstart' || event.type === 'mousedown' || event.type === 'keydown') {
|
||||||
navigator.mediaDevices
|
navigator.mediaDevices
|
||||||
.getUserMedia({ audio: true })
|
.getUserMedia({ audio: true })
|
||||||
?.then(handleRecording)
|
?.then(handleRecording)
|
||||||
.catch((e) => {
|
.catch((e) => {
|
||||||
this.flashStatusInChatInput("⛔️ Failed to access microphone");
|
this.flashStatusInChatInput("⛔️ Failed to access microphone");
|
||||||
});
|
});
|
||||||
} else if (this.mediaRecorder.state === 'recording' || event.type === 'touchend' || event.type === 'touchcancel') {
|
} else if (this.mediaRecorder?.state === 'recording' || event.type === 'touchend' || event.type === 'touchcancel' || event.type === 'mouseup' || event.type === 'keyup') {
|
||||||
this.mediaRecorder.stop();
|
this.mediaRecorder.stop();
|
||||||
this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
this.mediaRecorder.stream.getTracks().forEach(track => track.stop());
|
||||||
this.mediaRecorder = undefined;
|
this.mediaRecorder = undefined;
|
||||||
|
|||||||
@@ -613,9 +613,23 @@ img.copy-icon {
|
|||||||
margin-top: -16px;
|
margin-top: -16px;
|
||||||
margin-left: -16px;
|
margin-left: -16px;
|
||||||
border: 4px solid transparent;
|
border: 4px solid transparent;
|
||||||
border-top-color: var(--icon-color-active);
|
border-color: var(--icon-color-active);
|
||||||
border-radius: 50%;
|
border-radius: 50%;
|
||||||
animation: spin 1s linear infinite;
|
animation: pulse 3s ease-in-out infinite;
|
||||||
|
}
|
||||||
|
@keyframes pulse {
|
||||||
|
0% {
|
||||||
|
transform: scale(1);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
50% {
|
||||||
|
transform: scale(1.2);
|
||||||
|
opacity: 0.2;
|
||||||
|
}
|
||||||
|
100% {
|
||||||
|
transform: scale(1);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@keyframes spin {
|
@keyframes spin {
|
||||||
|
|||||||
Reference in New Issue
Block a user