enh/refac: read aloud audio queue

This commit is contained in:
Timothy Jaeryang Baek 2025-11-06 01:48:10 -05:00
parent e2b9942648
commit 1cc3493dc8
4 changed files with 130 additions and 65 deletions

View file

@ -27,6 +27,7 @@
banners, banners,
user, user,
socket, socket,
audioQueue,
showControls, showControls,
showCallOverlay, showCallOverlay,
currentChatPage, currentChatPage,
@ -43,6 +44,7 @@
pinnedChats, pinnedChats,
showEmbeds showEmbeds
} from '$lib/stores'; } from '$lib/stores';
import { import {
convertMessagesToHistory, convertMessagesToHistory,
copyToClipboard, copyToClipboard,
@ -53,6 +55,8 @@
removeAllDetails, removeAllDetails,
getCodeBlockContents getCodeBlockContents
} from '$lib/utils'; } from '$lib/utils';
import { AudioQueue } from '$lib/utils/audio';
import { import {
createNewChat, createNewChat,
getAllTags, getAllTags,
@ -529,17 +533,28 @@
let showControlsSubscribe = null; let showControlsSubscribe = null;
let selectedFolderSubscribe = null; let selectedFolderSubscribe = null;
const stopAudio = () => {
try {
speechSynthesis.cancel();
$audioQueue.stop();
} catch {}
};
onMount(async () => { onMount(async () => {
loading = true; loading = true;
console.log('mounted'); console.log('mounted');
window.addEventListener('message', onMessageHandler); window.addEventListener('message', onMessageHandler);
$socket?.on('events', chatEventHandler); $socket?.on('events', chatEventHandler);
audioQueue.set(new AudioQueue(document.getElementById('audioElement')));
pageSubscribe = page.subscribe(async (p) => { pageSubscribe = page.subscribe(async (p) => {
if (p.url.pathname === '/') { if (p.url.pathname === '/') {
await tick(); await tick();
initNewChat(); initNewChat();
} }
stopAudio();
}); });
const storageChatInput = sessionStorage.getItem( const storageChatInput = sessionStorage.getItem(
@ -621,6 +636,7 @@
chatIdUnsubscriber?.(); chatIdUnsubscriber?.();
window.removeEventListener('message', onMessageHandler); window.removeEventListener('message', onMessageHandler);
$socket?.off('events', chatEventHandler); $socket?.off('events', chatEventHandler);
$audioQueue?.destroy();
} catch (e) { } catch (e) {
console.error(e); console.error(e);
} }
@ -2347,7 +2363,7 @@
</title> </title>
</svelte:head> </svelte:head>
<audio id="audioElement" src="" style="display: none;" /> <audio id="audioElement" src="" style="display: none;"></audio>
<EventConfirmDialog <EventConfirmDialog
bind:show={showEventConfirmation} bind:show={showEventConfirmation}

View file

@ -15,7 +15,15 @@
import { getChatById } from '$lib/apis/chats'; import { getChatById } from '$lib/apis/chats';
import { generateTags } from '$lib/apis'; import { generateTags } from '$lib/apis';
import { config, models, settings, temporaryChatEnabled, TTSWorker, user } from '$lib/stores'; import {
audioQueue,
config,
models,
settings,
temporaryChatEnabled,
TTSWorker,
user
} from '$lib/stores';
import { synthesizeOpenAISpeech } from '$lib/apis/audio'; import { synthesizeOpenAISpeech } from '$lib/apis/audio';
import { imageGenerations } from '$lib/apis/images'; import { imageGenerations } from '$lib/apis/images';
import { import {
@ -156,7 +164,6 @@
let messageIndexEdit = false; let messageIndexEdit = false;
let audioParts: Record<number, HTMLAudioElement | null> = {};
let speaking = false; let speaking = false;
let speakingIdx: number | undefined; let speakingIdx: number | undefined;
@ -178,51 +185,25 @@
} }
}; };
const playAudio = (idx: number) => { const stopAudio = () => {
return new Promise<void>((res) => { try {
speakingIdx = idx; speechSynthesis.cancel();
const audio = audioParts[idx]; $audioQueue.stop();
} catch {}
if (!audio) {
return res();
}
audio.play();
audio.onended = async () => {
await new Promise((r) => setTimeout(r, 300));
if (Object.keys(audioParts).length - 1 === idx) {
speaking = false;
}
res();
};
});
};
const toggleSpeakMessage = async () => {
if (speaking) { if (speaking) {
try {
speechSynthesis.cancel();
if (speakingIdx !== undefined && audioParts[speakingIdx]) {
audioParts[speakingIdx]!.pause();
audioParts[speakingIdx]!.currentTime = 0;
}
} catch {}
speaking = false; speaking = false;
speakingIdx = undefined; speakingIdx = undefined;
return;
} }
};
const speak = async () => {
if (!(message?.content ?? '').trim().length) { if (!(message?.content ?? '').trim().length) {
toast.info($i18n.t('No content to speak')); toast.info($i18n.t('No content to speak'));
return; return;
} }
speaking = true; speaking = true;
const content = removeAllDetails(message.content); const content = removeAllDetails(message.content);
if ($config.audio.tts.engine === '') { if ($config.audio.tts.engine === '') {
@ -241,12 +222,12 @@
console.log(voice); console.log(voice);
const speak = new SpeechSynthesisUtterance(content); const speech = new SpeechSynthesisUtterance(content);
speak.rate = $settings.audio?.tts?.playbackRate ?? 1; speech.rate = $settings.audio?.tts?.playbackRate ?? 1;
console.log(speak); console.log(speech);
speak.onend = () => { speech.onend = () => {
speaking = false; speaking = false;
if ($settings.conversationMode) { if ($settings.conversationMode) {
document.getElementById('voice-input-button')?.click(); document.getElementById('voice-input-button')?.click();
@ -254,15 +235,21 @@
}; };
if (voice) { if (voice) {
speak.voice = voice; speech.voice = voice;
} }
speechSynthesis.speak(speak); speechSynthesis.speak(speech);
} }
}, 100); }, 100);
} else { } else {
loadingSpeech = true; $audioQueue.setId(`${message.id}`);
$audioQueue.setPlaybackRate($settings.audio?.tts?.playbackRate ?? 1);
$audioQueue.onStopped = () => {
speaking = false;
speakingIdx = undefined;
};
loadingSpeech = true;
const messageContentParts: string[] = getMessageContentParts( const messageContentParts: string[] = getMessageContentParts(
content, content,
$config?.audio?.tts?.split_on ?? 'punctuation' $config?.audio?.tts?.split_on ?? 'punctuation'
@ -278,17 +265,6 @@
} }
console.debug('Prepared message content for TTS', messageContentParts); console.debug('Prepared message content for TTS', messageContentParts);
audioParts = messageContentParts.reduce(
(acc, _sentence, idx) => {
acc[idx] = null;
return acc;
},
{} as typeof audioParts
);
let lastPlayedAudioPromise = Promise.resolve(); // Initialize a promise that resolves immediately
if ($settings.audio?.tts?.engine === 'browser-kokoro') { if ($settings.audio?.tts?.engine === 'browser-kokoro') {
if (!$TTSWorker) { if (!$TTSWorker) {
await TTSWorker.set( await TTSWorker.set(
@ -315,12 +291,9 @@
}); });
if (blob) { if (blob) {
const audio = new Audio(blob); const url = URL.createObjectURL(blob);
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1; $audioQueue.enqueue(url);
audioParts[idx] = audio;
loadingSpeech = false; loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
} }
} }
} else { } else {
@ -341,13 +314,10 @@
if (res) { if (res) {
const blob = await res.blob(); const blob = await res.blob();
const blobUrl = URL.createObjectURL(blob); const url = URL.createObjectURL(blob);
const audio = new Audio(blobUrl);
audio.playbackRate = $settings.audio?.tts?.playbackRate ?? 1;
audioParts[idx] = audio; $audioQueue.enqueue(url);
loadingSpeech = false; loadingSpeech = false;
lastPlayedAudioPromise = lastPlayedAudioPromise.then(() => playAudio(idx));
} }
} }
} }
@ -992,7 +962,11 @@
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition" : 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition"
on:click={() => { on:click={() => {
if (!loadingSpeech) { if (!loadingSpeech) {
toggleSpeakMessage(); if (speaking) {
stopAudio();
} else {
speak();
}
} }
}} }}
> >

View file

@ -68,6 +68,8 @@ export const banners: Writable<Banner[]> = writable([]);
export const settings: Writable<Settings> = writable({}); export const settings: Writable<Settings> = writable({});
export const audioQueue = writable(null);
export const showSidebar = writable(false); export const showSidebar = writable(false);
export const showSearch = writable(false); export const showSearch = writable(false);
export const showSettings = writable(false); export const showSettings = writable(false);

73
src/lib/utils/audio.ts Normal file
View file

@ -0,0 +1,73 @@
export class AudioQueue {
constructor(audioElement) {
this.audio = audioElement;
this.queue = [];
this.current = null;
this.id = null;
this._onEnded = () => this.next();
this.audio.addEventListener('ended', this._onEnded);
this.onStopped = null; // optional callback
}
setId(newId) {
console.log('Setting audio queue ID to:', newId);
if (this.id !== newId) {
this.stop();
this.id = newId;
if (this.onStopped) this.onStopped({ event: 'id-change', id: newId });
}
}
setPlaybackRate(rate) {
console.log('Setting audio playback rate to:', rate);
this.audio.playbackRate = rate;
}
enqueue(url) {
console.log('Enqueuing audio URL:', url);
this.queue.push(url);
// Auto-play if nothing is currently playing or loaded
if (this.audio.paused && !this.current) {
this.next();
}
}
play() {
if (!this.current && this.queue.length > 0) {
this.next();
} else {
this.audio.play();
}
}
next() {
this.current = this.queue.shift();
if (this.current) {
this.audio.src = this.current;
this.audio.play();
console.log('Playing audio URL:', this.current);
} else {
this.stop();
if (this.onStopped) this.onStopped({ event: 'empty-queue', id: this.id });
}
}
stop() {
this.audio.pause();
this.audio.currentTime = 0;
this.audio.src = '';
this.queue = [];
this.current = null;
if (this.onStopped) this.onStopped({ event: 'stop', id: this.id });
}
destroy() {
this.audio.removeEventListener('ended', this._onEnded);
this.stop();
this.onStopped = null;
this.audio = null;
}
}