From 719ced580002c001554e45bfbae0f282ea0f1b9d Mon Sep 17 00:00:00 2001 From: Ilango Rajagopal Date: Tue, 26 Aug 2025 13:42:27 +0530 Subject: [PATCH 1/3] Add TTS feature with autoplay --- src/components/Bot.tsx | 448 +++++++++++++++++++++++- src/components/bubbles/BotBubble.tsx | 55 ++- src/components/buttons/TTSButton.tsx | 51 +++ src/components/icons/SquareStopIcon.tsx | 19 + src/components/icons/VolumeIcon.tsx | 21 ++ src/components/icons/XIcon.tsx | 2 +- src/components/icons/index.ts | 2 + src/queries/sendMessageQuery.ts | 30 ++ src/utils/index.ts | 2 + 9 files changed, 612 insertions(+), 18 deletions(-) create mode 100644 src/components/buttons/TTSButton.tsx create mode 100644 src/components/icons/SquareStopIcon.tsx create mode 100644 src/components/icons/VolumeIcon.tsx diff --git a/src/components/Bot.tsx b/src/components/Bot.tsx index ca42113d0..f15284d2e 100644 --- a/src/components/Bot.tsx +++ b/src/components/Bot.tsx @@ -1,4 +1,4 @@ -import { createSignal, createEffect, For, onMount, Show, mergeProps, on, createMemo } from 'solid-js'; +import { createSignal, createEffect, For, onMount, Show, mergeProps, on, createMemo, onCleanup } from 'solid-js'; import { v4 as uuidv4 } from 'uuid'; import { sendMessageQuery, @@ -8,6 +8,7 @@ import { getChatbotConfig, FeedbackRatingType, createAttachmentWithFormData, + generateTTSQuery, } from '@/queries/sendMessageQuery'; import { TextInput } from './inputs/textInput'; import { GuestBubble } from './bubbles/GuestBubble'; @@ -516,6 +517,21 @@ export const Bot = (botProps: BotProps & { class?: string }) => { const [uploadedFiles, setUploadedFiles] = createSignal<{ file: File; type: string }[]>([]); const [fullFileUploadAllowedTypes, setFullFileUploadAllowedTypes] = createSignal('*'); + // TTS state + const [isTTSLoading, setIsTTSLoading] = createSignal>({}); + const [isTTSPlaying, setIsTTSPlaying] = createSignal>({}); + const [ttsAudio, setTtsAudio] = createSignal>({}); + const [isTTSEnabled, setIsTTSEnabled] = createSignal(false); + const [ttsStreamingState, setTtsStreamingState] = createSignal({ + mediaSource: null as MediaSource | null, + sourceBuffer: null as SourceBuffer | null, + audio: null as HTMLAudioElement | null, + chunkQueue: [] as Uint8Array[], + isBuffering: false, + audioFormat: null as string | null, + abortController: null as AbortController | null, + }); + createMemo(() => { const customerId = (props.chatflowConfig?.vars as any)?.customerId; setChatId(customerId ? `${customerId.toString()}+${uuidv4()}` : uuidv4()); @@ -861,6 +877,15 @@ export const Bot = (botProps: BotProps & { class?: string }) => { setLocalStorageChatflow(chatflowid, chatId); closeResponse(); break; + case 'tts_start': + handleTTSStart(payload.data); + break; + case 'tts_data': + handleTTSDataChunk(payload.data.audioChunk); + break; + case 'tts_end': + handleTTSEnd(); + break; } }, async onclose() { @@ -1401,6 +1426,9 @@ export const Bot = (botProps: BotProps & { class?: string }) => { setFullFileUploadAllowedTypes(chatbotConfig.fullFileUpload?.allowedUploadFileTypes); } } + if (chatbotConfig.isTTSEnabled) { + setIsTTSEnabled(chatbotConfig.isTTSEnabled); + } } // eslint-disable-next-line solid/reactivity @@ -1417,6 +1445,42 @@ export const Bot = (botProps: BotProps & { class?: string }) => { }; }); + // TTS sourceBuffer updateend listener management + let currentSourceBuffer: SourceBuffer | null = null; + let updateEndHandler: (() => void) | null = null; + + createEffect(() => { + const streamingState = ttsStreamingState(); + + // Remove previous listener if sourceBuffer changed + if (currentSourceBuffer && currentSourceBuffer !== streamingState.sourceBuffer && updateEndHandler) { + currentSourceBuffer.removeEventListener('updateend', updateEndHandler); + currentSourceBuffer = null; + updateEndHandler = null; + } + + // Add listener to new sourceBuffer + if (streamingState.sourceBuffer && streamingState.sourceBuffer !== currentSourceBuffer) { + const sourceBuffer = streamingState.sourceBuffer; + currentSourceBuffer = sourceBuffer; + + updateEndHandler = () => { + setTtsStreamingState((prevState) => ({ + ...prevState, + isBuffering: false, + })); + setTimeout(() => processChunkQueue(), 0); + }; + + sourceBuffer.addEventListener('updateend', updateEndHandler); + } + }); + + // TTS cleanup on component unmount + onCleanup(() => { + cleanupTTSStreaming(); + }); + createEffect(() => { if (followUpPromptsStatus() && messages().length > 0) { const lastMessage = messages()[messages().length - 1]; @@ -1680,6 +1744,383 @@ export const Bot = (botProps: BotProps & { class?: string }) => { return false; }; + // TTS Functions + const processChunkQueue = () => { + const currentState = ttsStreamingState(); + if (!currentState.sourceBuffer || currentState.sourceBuffer.updating || currentState.chunkQueue.length === 0) { + return; + } + + const chunk = currentState.chunkQueue[0]; + if (!chunk) return; + + try { + currentState.sourceBuffer.appendBuffer(chunk); + setTtsStreamingState((prevState) => ({ + ...prevState, + chunkQueue: prevState.chunkQueue.slice(1), + isBuffering: true, + })); + } catch (error) { + console.error('Error appending chunk to buffer:', error); + } + }; + + const handleTTSStart = (data: { chatMessageId: string; format: string }) => { + setIsTTSLoading((prevState) => ({ + ...prevState, + [data.chatMessageId]: true, + })); + + setMessages((prevMessages) => { + const allMessages = [...cloneDeep(prevMessages)]; + const lastMessage = allMessages[allMessages.length - 1]; + if (lastMessage.type === 'userMessage') return allMessages; + if (lastMessage.id) return allMessages; + allMessages[allMessages.length - 1].id = data.chatMessageId; + return allMessages; + }); + + setTtsStreamingState({ + mediaSource: null, + sourceBuffer: null, + audio: null, + chunkQueue: [], + isBuffering: false, + audioFormat: data.format, + abortController: null, + }); + + setTimeout(() => initializeTTSStreaming(data), 0); + }; + + const handleTTSDataChunk = (base64Data: string) => { + try { + const audioBuffer = Uint8Array.from(atob(base64Data), (c) => c.charCodeAt(0)); + + setTtsStreamingState((prevState) => { + const newState = { + ...prevState, + chunkQueue: [...prevState.chunkQueue, audioBuffer], + }; + + // Schedule processing after state update + if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) { + setTimeout(() => processChunkQueue(), 0); + } + + return newState; + }); + } catch (error) { + console.error('Error handling TTS data chunk:', error); + } + }; + + const handleTTSEnd = () => { + const currentState = ttsStreamingState(); + if (currentState.mediaSource && currentState.mediaSource.readyState === 'open') { + try { + // Process any remaining chunks first + if (currentState.sourceBuffer && currentState.chunkQueue.length > 0) { + let processedCount = 0; + const totalChunks = currentState.chunkQueue.length; + + const processRemainingChunks = () => { + const state = ttsStreamingState(); + if (processedCount < totalChunks && state.sourceBuffer && !state.sourceBuffer.updating) { + const chunk = state.chunkQueue[0]; + if (chunk) { + try { + state.sourceBuffer.appendBuffer(chunk); + setTtsStreamingState((prevState) => ({ + ...prevState, + chunkQueue: prevState.chunkQueue.slice(1), + })); + processedCount++; + } catch (error) { + console.error('Error appending remaining chunk:', error); + } + } + } else if (processedCount >= totalChunks) { + // All chunks processed, end the stream + setTimeout(() => { + const finalState = ttsStreamingState(); + if (finalState.mediaSource && finalState.mediaSource.readyState === 'open') { + finalState.mediaSource.endOfStream(); + } + }, 100); + } + }; + + // Set up listener for processing remaining chunks + if (currentState.sourceBuffer) { + const handleFinalUpdateEnd = () => { + processRemainingChunks(); + }; + currentState.sourceBuffer.addEventListener('updateend', handleFinalUpdateEnd); + processRemainingChunks(); + } + } else if (currentState.sourceBuffer && !currentState.sourceBuffer.updating) { + currentState.mediaSource.endOfStream(); + } + } catch (error) { + console.error('Error ending TTS stream:', error); + } + } + }; + + const initializeTTSStreaming = (data: { chatMessageId: string; format: string }) => { + try { + const mediaSource = new MediaSource(); + const audio = new Audio(); + audio.src = URL.createObjectURL(mediaSource); + + mediaSource.addEventListener('sourceopen', () => { + try { + const mimeType = data.format === 'mp3' ? 'audio/mpeg' : 'audio/mpeg'; + const sourceBuffer = mediaSource.addSourceBuffer(mimeType); + + setTtsStreamingState((prevState) => ({ + ...prevState, + mediaSource, + sourceBuffer, + audio, + })); + + // Start audio playback + audio.play().catch((playError) => { + console.error('Error starting audio playback:', playError); + }); + } catch (error) { + console.error('Error setting up source buffer:', error); + console.error('MediaSource readyState:', mediaSource.readyState); + } + }); + + audio.addEventListener('playing', () => { + setIsTTSLoading((prevState) => { + const newState = { ...prevState }; + newState[data.chatMessageId] = false; + return newState; + }); + setIsTTSPlaying((prevState) => ({ + ...prevState, + [data.chatMessageId]: true, + })); + }); + + audio.addEventListener('ended', () => { + setIsTTSPlaying((prevState) => { + const newState = { ...prevState }; + delete newState[data.chatMessageId]; + return newState; + }); + cleanupTTSStreaming(); + }); + } catch (error) { + console.error('Error initializing TTS streaming:', error); + } + }; + + const cleanupTTSStreaming = () => { + const currentState = ttsStreamingState(); + + if (currentState.abortController) { + currentState.abortController.abort(); + } + + if (currentState.audio) { + currentState.audio.pause(); + currentState.audio.removeAttribute('src'); + if (currentState.audio.src) { + URL.revokeObjectURL(currentState.audio.src); + } + // Remove all event listeners + currentState.audio.removeEventListener('playing'); + currentState.audio.removeEventListener('ended'); + } + + if (currentState.sourceBuffer) { + // Remove update listeners + if (currentState.sourceBuffer.onupdateend) { + currentState.sourceBuffer.removeEventListener('updateend', currentState.sourceBuffer.onupdateend); + currentState.sourceBuffer.onupdateend = null; + } + } + + if (currentState.mediaSource) { + if (currentState.mediaSource.readyState === 'open') { + try { + currentState.mediaSource.endOfStream(); + } catch (e) { + // Ignore errors during cleanup + } + } + } + + setTtsStreamingState({ + mediaSource: null, + sourceBuffer: null, + audio: null, + chunkQueue: [], + isBuffering: false, + audioFormat: null, + abortController: null, + }); + }; + + const handleTTSStop = (messageId: string) => { + const audioElement = ttsAudio()[messageId]; + if (audioElement) { + audioElement.pause(); + audioElement.currentTime = 0; + setTtsAudio((prev) => { + const newState = { ...prev }; + delete newState[messageId]; + return newState; + }); + } + + const streamingState = ttsStreamingState(); + if (streamingState.audio) { + streamingState.audio.pause(); + cleanupTTSStreaming(); + } + + setIsTTSPlaying((prev) => { + const newState = { ...prev }; + delete newState[messageId]; + return newState; + }); + + setIsTTSLoading((prev) => { + const newState = { ...prev }; + delete newState[messageId]; + return newState; + }); + }; + + const stopAllTTS = () => { + const audioElements = ttsAudio(); + Object.keys(audioElements).forEach((messageId) => { + if (audioElements[messageId]) { + audioElements[messageId].pause(); + audioElements[messageId].currentTime = 0; + } + }); + setTtsAudio({}); + + const streamingState = ttsStreamingState(); + if (streamingState.abortController) { + streamingState.abortController.abort(); + } + + if (streamingState.audio) { + streamingState.audio.pause(); + cleanupTTSStreaming(); + } + + setIsTTSPlaying({}); + setIsTTSLoading({}); + }; + + const handleTTSClick = async (messageId: string, messageText: string) => { + const loadingState = isTTSLoading(); + if (loadingState[messageId]) return; + + const playingState = isTTSPlaying(); + const audioElement = ttsAudio()[messageId]; + if (playingState[messageId] || audioElement) { + handleTTSStop(messageId); + return; + } + + stopAllTTS(); + handleTTSStart({ chatMessageId: messageId, format: 'mp3' }); + + try { + const abortController = new AbortController(); + setTtsStreamingState((prev) => ({ ...prev, abortController })); + + const response = await generateTTSQuery({ + apiHost: props.apiHost, + body: { + chatId: chatId(), + chatflowId: props.chatflowid, + chatMessageId: messageId, + text: messageText, + }, + onRequest: props.onRequest, + signal: abortController.signal, + }); + + if (!response.ok) { + throw new Error(`TTS request failed: ${response.status}`); + } + + const reader = response.body?.getReader(); + const decoder = new TextDecoder(); + + if (reader) { + let buffer = ''; + let done = false; + while (!done) { + if (abortController.signal.aborted) { + break; + } + + const result = await reader.read(); + done = result.done; + if (done) break; + + const value = result.value; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split('\n'); + buffer = lines.pop() || ''; + + for (const line of lines) { + if (line.trim() && line.startsWith('data: ')) { + try { + const eventData = line.slice(6); + if (eventData === '[DONE]') break; + + const event = JSON.parse(eventData); + switch (event.event) { + case 'tts_start': + break; + case 'tts_data': + if (!abortController.signal.aborted) { + handleTTSDataChunk(event.data.audioChunk); + } + break; + case 'tts_end': + if (!abortController.signal.aborted) { + handleTTSEnd(); + } + break; + } + } catch (parseError) { + console.error('Error parsing SSE event:', parseError); + } + } + } + } + } + } catch (error: any) { + if (error.name === 'AbortError') { + console.error('TTS request was aborted'); + } else { + console.error('Error with TTS:', error); + } + } finally { + setIsTTSLoading((prev) => { + const newState = { ...prev }; + delete newState[messageId]; + return newState; + }); + } + }; + createEffect( // listen for changes in previews on(previews, (uploads) => { @@ -1856,6 +2297,11 @@ export const Bot = (botProps: BotProps & { class?: string }) => { }} dateTimeToggle={props.dateTimeToggle} renderHTML={props.renderHTML} + isTTSEnabled={isTTSEnabled()} + isTTSLoading={isTTSLoading()} + isTTSPlaying={isTTSPlaying()} + handleTTSClick={handleTTSClick} + handleTTSStop={handleTTSStop} /> )} {message.type === 'leadCaptureMessage' && leadsConfig()?.status && !getLocalStorageChatflow(props.chatflowid)?.lead && ( diff --git a/src/components/bubbles/BotBubble.tsx b/src/components/bubbles/BotBubble.tsx index e747fa6e9..8f5fa308b 100644 --- a/src/components/bubbles/BotBubble.tsx +++ b/src/components/bubbles/BotBubble.tsx @@ -4,6 +4,7 @@ import { Marked } from '@ts-stack/markdown'; import { FeedbackRatingType, sendFeedbackQuery, sendFileDownloadQuery, updateFeedbackQuery } from '@/queries/sendMessageQuery'; import { FileUpload, IAction, MessageType } from '../Bot'; import { CopyToClipboardButton, ThumbsDownButton, ThumbsUpButton } from '../buttons/FeedbackButtons'; +import { TTSButton } from '../buttons/TTSButton'; import FeedbackContentDialog from '../FeedbackContentDialog'; import { AgentReasoningBubble } from './AgentReasoningBubble'; import { TickIcon, XIcon } from '../icons'; @@ -32,6 +33,12 @@ type Props = { renderHTML?: boolean; handleActionClick: (elem: any, action: IAction | undefined | null) => void; handleSourceDocumentsClick: (src: any) => void; + // TTS props + isTTSEnabled?: boolean; + isTTSLoading?: Record; + isTTSPlaying?: Record; + handleTTSClick?: (messageId: string, messageText: string) => void; + handleTTSStop?: (messageId: string) => void; }; const defaultBackgroundColor = '#f7f8ff'; @@ -481,7 +488,7 @@ export const BotBubble = (props: Props) => { {action.label} ) : ( - + )} ); @@ -521,9 +528,25 @@ export const BotBubble = (props: Props) => { )}
- {props.chatFeedbackStatus && props.message.messageId && ( - <> -
+
+ + { + const messageId = props.message.id || ''; + const messageText = props.message.message || ''; + if (props.isTTSPlaying?.[messageId]) { + props.handleTTSStop?.(messageId); + } else { + props.handleTTSClick?.(messageId, messageText); + } + }} + /> + + {props.chatFeedbackStatus && props.message.messageId && ( + <> copyMessageToClipboard()} />
@@ -546,18 +569,18 @@ export const BotBubble = (props: Props) => { {formatDateTime(props.message.dateTime, props?.dateTimeToggle?.date, props?.dateTimeToggle?.time)}
-
- - setShowFeedbackContentModal(false)} - onSubmit={submitFeedbackContent} - backgroundColor={props.backgroundColor} - textColor={props.textColor} - /> - - - )} + + )} +
+ + setShowFeedbackContentModal(false)} + onSubmit={submitFeedbackContent} + backgroundColor={props.backgroundColor} + textColor={props.textColor} + /> +
); diff --git a/src/components/buttons/TTSButton.tsx b/src/components/buttons/TTSButton.tsx new file mode 100644 index 000000000..55985dd57 --- /dev/null +++ b/src/components/buttons/TTSButton.tsx @@ -0,0 +1,51 @@ +import { Show } from 'solid-js'; +import { VolumeIcon, SquareStopIcon } from '../icons'; + +type Props = { + isLoading?: boolean; + isPlaying?: boolean; + feedbackColor?: string; + onClick: () => void; + class?: string; +}; + +const defaultButtonColor = '#3B81F6'; + +export const TTSButton = (props: Props) => { + const handleClick = (event: MouseEvent) => { + event.preventDefault(); + props.onClick(); + }; + + return ( + + ); +}; diff --git a/src/components/icons/SquareStopIcon.tsx b/src/components/icons/SquareStopIcon.tsx new file mode 100644 index 000000000..6adbe6152 --- /dev/null +++ b/src/components/icons/SquareStopIcon.tsx @@ -0,0 +1,19 @@ +import { JSX } from 'solid-js/jsx-runtime'; +const defaultButtonColor = '#3B81F6'; +export const SquareStopIcon = (props: JSX.SvgSVGAttributes) => ( + + + +); diff --git a/src/components/icons/VolumeIcon.tsx b/src/components/icons/VolumeIcon.tsx new file mode 100644 index 000000000..b5e0fcaf9 --- /dev/null +++ b/src/components/icons/VolumeIcon.tsx @@ -0,0 +1,21 @@ +import { JSX } from 'solid-js/jsx-runtime'; +const defaultButtonColor = '#3B81F6'; +export const VolumeIcon = (props: JSX.SvgSVGAttributes) => ( + + + + + +); diff --git a/src/components/icons/XIcon.tsx b/src/components/icons/XIcon.tsx index 98af59746..6587e92ab 100644 --- a/src/components/icons/XIcon.tsx +++ b/src/components/icons/XIcon.tsx @@ -7,7 +7,7 @@ export const XIcon = (props: JSX.SvgSVGAttributes & { isCurrentCo height="24" viewBox="0 0 24 24" fill="none" - stroke={props.isCurrentColor ? 'currentColor' : props.color ?? defaultButtonColor} + stroke={props.isCurrentColor ? 'currentColor' : (props.color ?? defaultButtonColor)} stroke-width="2" stroke-linecap="round" stroke-linejoin="round" diff --git a/src/components/icons/index.ts b/src/components/icons/index.ts index 5f3e3496e..543629d55 100644 --- a/src/components/icons/index.ts +++ b/src/components/icons/index.ts @@ -11,3 +11,5 @@ export * from './XIcon'; export * from './TickIcon'; export * from './AttachmentIcon'; export * from './SparklesIcon'; +export * from './VolumeIcon'; +export * from './SquareStopIcon'; diff --git a/src/queries/sendMessageQuery.ts b/src/queries/sendMessageQuery.ts index cbea3e745..60523bc12 100644 --- a/src/queries/sendMessageQuery.ts +++ b/src/queries/sendMessageQuery.ts @@ -61,6 +61,16 @@ export type LeadCaptureRequest = BaseRequest & { body: Partial; }; +export type GenerateTTSRequest = BaseRequest & { + body: { + chatId: string; + chatflowId: string; + chatMessageId: string; + text: string; + }; + signal?: AbortSignal; +}; + export const sendFeedbackQuery = ({ chatflowid, apiHost = 'http://localhost:3000', body, onRequest }: CreateFeedbackRequest) => sendRequest({ method: 'POST', @@ -137,3 +147,23 @@ export const addLeadQuery = ({ apiHost = 'http://localhost:3000', body, onReques body, onRequest: onRequest, }); + +export const generateTTSQuery = async ({ apiHost = 'http://localhost:3000', body, onRequest, signal }: GenerateTTSRequest): Promise => { + const headers = { + 'Content-Type': 'application/json', + }; + + const requestInfo: RequestInit = { + method: 'POST', + mode: 'cors', + headers, + body: JSON.stringify(body), + signal, + }; + + if (onRequest) { + await onRequest(requestInfo); + } + + return fetch(`${apiHost}/api/v1/text-to-speech/generate`, requestInfo); +}; diff --git a/src/utils/index.ts b/src/utils/index.ts index 3285ec0b8..8c291db0e 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -16,6 +16,7 @@ export const sendRequest = async ( headers?: Record; formData?: FormData; onRequest?: (request: RequestInit) => Promise; + signal?: AbortSignal; } | string, ): Promise<{ data?: ResponseData; error?: Error }> => { @@ -36,6 +37,7 @@ export const sendRequest = async ( mode: 'cors', headers, body, + signal: typeof params !== 'string' ? params.signal : undefined, }; if (typeof params !== 'string' && params.onRequest) { From 204c5a6dd35d440b61949561b20462a7a3367a64 Mon Sep 17 00:00:00 2001 From: Ilango Rajagopal Date: Tue, 26 Aug 2025 13:44:54 +0530 Subject: [PATCH 2/3] Fix lint errors and formatting --- src/components/Bot.tsx | 4 ++-- src/components/buttons/TTSButton.tsx | 4 +++- src/components/icons/XIcon.tsx | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/components/Bot.tsx b/src/components/Bot.tsx index f15284d2e..03730d543 100644 --- a/src/components/Bot.tsx +++ b/src/components/Bot.tsx @@ -1936,8 +1936,8 @@ export const Bot = (botProps: BotProps & { class?: string }) => { URL.revokeObjectURL(currentState.audio.src); } // Remove all event listeners - currentState.audio.removeEventListener('playing'); - currentState.audio.removeEventListener('ended'); + currentState.audio.removeEventListener('playing', () => console.log('Playing')); + currentState.audio.removeEventListener('ended', () => console.log('Ended')); } if (currentState.sourceBuffer) { diff --git a/src/components/buttons/TTSButton.tsx b/src/components/buttons/TTSButton.tsx index 55985dd57..4b1135485 100644 --- a/src/components/buttons/TTSButton.tsx +++ b/src/components/buttons/TTSButton.tsx @@ -19,7 +19,9 @@ export const TTSButton = (props: Props) => { return (