FlowiseAI · 0xi4o · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 14, 2025
diff --git a/package.json b/package.json
@@ -127,5 +127,8 @@
                 }
             ]
         ]
+    },
+    "dependencies": {
+        "@elevenlabs/elevenlabs-js": "^2.8.0"
     }
 }
diff --git a/packages/components/credentials/ElevenLabsApi.credential.ts b/packages/components/credentials/ElevenLabsApi.credential.ts
@@ -0,0 +1,26 @@
+import { INodeParams, INodeCredential } from '../src/Interface'
+
+class ElevenLabsApi implements INodeCredential {
+    label: string
+    name: string
+    version: number
+    description: string
+    inputs: INodeParams[]
+
+    constructor() {
+        this.label = 'Eleven Labs API'
+        this.name = 'elevenLabsApi'
+        this.version = 1.0
+        this.description =
+            'Sign up for a Eleven Labs account and <a target="_blank" href="https://elevenlabs.io/app/settings/api-keys">create an API Key</a>.'
+        this.inputs = [
+            {
+                label: 'Eleven Labs API Key',
+                name: 'elevenLabsApiKey',
+                type: 'password'
+            }
+        ]
+    }
+}
+
+module.exports = { credClass: ElevenLabsApi }
diff --git a/packages/components/package.json b/packages/components/package.json
@@ -33,6 +33,7 @@
         "@dqbd/tiktoken": "^1.0.21",
         "@e2b/code-interpreter": "^1.5.1",
         "@elastic/elasticsearch": "^8.9.0",
+        "@elevenlabs/elevenlabs-js": "^2.8.0",
         "@flowiseai/nodevm": "^3.9.25",
         "@getzep/zep-cloud": "~1.0.7",
         "@getzep/zep-js": "^0.9.0",

diff --git a/packages/components/src/Interface.ts b/packages/components/src/Interface.ts
@@ -441,6 +441,9 @@ export interface IServerSideEventStreamer {
     streamAbortEvent(chatId: string): void
     streamEndEvent(chatId: string): void
     streamUsageMetadataEvent(chatId: string, data: any): void
+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void
 }
 
 export enum FollowUpPromptProvider {

diff --git a/packages/components/src/index.ts b/packages/components/src/index.ts
@@ -7,6 +7,7 @@ dotenv.config({ path: envPath, override: true })
 export * from './Interface'
 export * from './utils'
 export * from './speechToText'
+export * from './textToSpeech'
 export * from './storageUtils'
 export * from './handler'
 export * from '../evaluation/EvaluationRunner'

diff --git a/packages/components/src/textToSpeech.ts b/packages/components/src/textToSpeech.ts
@@ -0,0 +1,189 @@
+import { ICommonObject } from './Interface'
+import { getCredentialData } from './utils'
+import OpenAI from 'openai'
+import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
+import { Readable } from 'node:stream'
+import type { ReadableStream } from 'node:stream/web'
+
+const TextToSpeechType = {
+    OPENAI_TTS: 'openai',
+    ELEVEN_LABS_TTS: 'elevenlabs'
+}
+
+export const convertTextToSpeechStream = async (
+    text: string,
+    textToSpeechConfig: ICommonObject,
+    options: ICommonObject,
+    onStart: (format: string) => void,
+    onChunk: (chunk: Buffer) => void,
+    onEnd: () => void
+): Promise<void> => {
+    return new Promise<void>((resolve, reject) => {
+        const processStream = async () => {
+            try {
+                if (textToSpeechConfig) {
+                    const credentialId = textToSpeechConfig.credentialId as string
+                    const credentialData = await getCredentialData(credentialId ?? '', options)
+
+                    switch (textToSpeechConfig.name) {
+                        case TextToSpeechType.OPENAI_TTS: {
+                            onStart('mp3')
+
+                            const openai = new OpenAI({
+                                apiKey: credentialData.openAIApiKey
+                            })
+
+                            const response = await openai.audio.speech.create({
+                                model: 'gpt-4o-mini-tts',
+                                voice: (textToSpeechConfig.voice || 'alloy') as
+                                    | 'alloy'
+                                    | 'ash'
+                                    | 'ballad'
+                                    | 'coral'
+                                    | 'echo'
+                                    | 'fable'
+                                    | 'nova'
+                                    | 'onyx'
+                                    | 'sage'
+                                    | 'shimmer',
+                                input: text,
+                                response_format: 'mp3'
+                            })
+
+                            const stream = response.body as unknown as Readable
+                            if (!stream) {
+                                throw new Error('Failed to get response stream')
+                            }
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20)
+                            break
+                        }
+
+                        case TextToSpeechType.ELEVEN_LABS_TTS: {
+                            onStart('mp3')
+
+                            const client = new ElevenLabsClient({
+                                apiKey: credentialData.elevenLabsApiKey
+                            })
+
+                            const response = await client.textToSpeech.stream(textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM', {
+                                text: text,
+                                modelId: 'eleven_multilingual_v2'
+                            })
+
+                            const stream = Readable.fromWeb(response as unknown as ReadableStream)
+                            if (!stream) {
+                                throw new Error('Failed to get response stream')
+                            }
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40)
+                            break
+                        }
+                    }
+                } else {
+                    reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
+                }
+            } catch (error) {
+                reject(error)
+            }
+        }
+
+        processStream()
+    })
+}
+
+const processStreamWithRateLimit = async (
+    stream: Readable,
+    onChunk: (chunk: Buffer) => void,
+    onEnd: () => void,
+    resolve: () => void,
+    reject: (error: any) => void,
+    targetChunkSize: number = 640,
+    rateLimitMs: number = 20
+) => {
+    const TARGET_CHUNK_SIZE = targetChunkSize
+    const RATE_LIMIT_MS = rateLimitMs
+
+    let buffer: Buffer = Buffer.alloc(0)
+    let isEnded = false
+
+    const processChunks = async () => {
+        while (!isEnded || buffer.length > 0) {
+            if (buffer.length >= TARGET_CHUNK_SIZE) {
+                const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE)
+                buffer = buffer.subarray(TARGET_CHUNK_SIZE)
+                onChunk(chunk)
+                await sleep(RATE_LIMIT_MS)
+            } else if (isEnded && buffer.length > 0) {
+                onChunk(buffer)
+                buffer = Buffer.alloc(0)
+            } else if (!isEnded) {
+                await sleep(RATE_LIMIT_MS)
+            } else {
+                break
+            }
+        }
+
+        onEnd()
+        resolve()
+    }
+
+    stream.on('data', (chunk) => {
+        buffer = Buffer.concat([buffer, Buffer.from(chunk)])
+    })
+
+    stream.on('end', () => {
+        isEnded = true
+    })
+
+    stream.on('error', (error) => {
+        reject(error)
+    })
+
+    processChunks().catch(reject)
+}
+
+const sleep = (ms: number): Promise<void> => {
+    return new Promise((resolve) => setTimeout(resolve, ms))
+}
+
+export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => {
+    const credentialData = await getCredentialData(credentialId ?? '', options)
+
+    switch (provider) {
+        case TextToSpeechType.OPENAI_TTS:
+            return [
+                { id: 'alloy', name: 'Alloy' },
+                { id: 'ash', name: 'Ash' },
+                { id: 'ballad', name: 'Ballad' },
+                { id: 'coral', name: 'Coral' },
+                { id: 'echo', name: 'Echo' },
+                { id: 'fable', name: 'Fable' },
+                { id: 'nova', name: 'Nova' },
+                { id: 'onyx', name: 'Onyx' },
+                { id: 'sage', name: 'Sage' },
+                { id: 'shimmer', name: 'Shimmer' }
+            ]
+
+        case TextToSpeechType.ELEVEN_LABS_TTS: {
+            const client = new ElevenLabsClient({
+                apiKey: credentialData.elevenLabsApiKey
+            })
+
+            const voices = await client.voices.search({
+                pageSize: 100,
+                voiceType: 'default',
+                category: 'premade'
+            })
+
+            return voices.voices.map((voice) => ({
+                id: voice.voiceId,
+                name: voice.name,
+                category: voice.category
+            }))
+        }
+
+        default:
+            throw new Error(`Unsupported TTS provider: ${provider}`)
+    }
+}
diff --git a/packages/server/src/Interface.ts b/packages/server/src/Interface.ts
@@ -64,6 +64,7 @@ export interface IChatFlow {
     apikeyid?: string
     analytic?: string
     speechToText?: string
+    textToSpeech?: string
     chatbotConfig?: string
     followUpPrompts?: string
     apiConfig?: string

diff --git a/packages/server/src/controllers/internal-predictions/index.ts b/packages/server/src/controllers/internal-predictions/index.ts
@@ -3,6 +3,7 @@ import { utilBuildChatflow } from '../../utils/buildChatflow'
 import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
 import { getErrorMessage } from '../../errors/utils'
 import { MODE } from '../../Interface'
+import { generateTTSForResponseStream, shouldAutoPlayTTS } from '../../utils/buildChatflow'
 
 // Send input message and get prediction result (Internal)
 const createInternalPrediction = async (req: Request, res: Response, next: NextFunction) => {
@@ -38,6 +39,26 @@ const createAndStreamInternalPrediction = async (req: Request, res: Response, ne
 
         const apiResponse = await utilBuildChatflow(req, true)
         sseStreamer.streamMetadataEvent(apiResponse.chatId, apiResponse)
+
+        const chatflow = req.body.chatflow || req.body
+        if (shouldAutoPlayTTS(chatflow.textToSpeech) && apiResponse.text) {
+            const options = {
+                orgId: req.body.orgId || '',
+                chatflowid: req.body.chatflowid || '',
+                chatId: apiResponse.chatId,
+                appDataSource: getRunningExpressApp().AppDataSource,
+                databaseEntities: getRunningExpressApp().AppDataSource?.entityMetadatas || []
+            }
+
+            await generateTTSForResponseStream(
+                apiResponse.text,
+                chatflow.textToSpeech,
+                options,
+                apiResponse.chatId,
+                apiResponse.chatMessageId,
+                sseStreamer
+            )
+        }
     } catch (error) {
         if (chatId) {
             sseStreamer.streamErrorEvent(chatId, getErrorMessage(error))

diff --git a/packages/server/src/controllers/predictions/index.ts b/packages/server/src/controllers/predictions/index.ts
@@ -9,6 +9,7 @@ import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
 import { v4 as uuidv4 } from 'uuid'
 import { getErrorMessage } from '../../errors/utils'
 import { MODE } from '../../Interface'
+import { generateTTSForResponseStream, shouldAutoPlayTTS } from '../../utils/buildChatflow'
 
 // Send input message and get prediction result (External)
 const createPrediction = async (req: Request, res: Response, next: NextFunction) => {
@@ -76,6 +77,26 @@ const createPrediction = async (req: Request, res: Response, next: NextFunction)
 
                     const apiResponse = await predictionsServices.buildChatflow(req)
                     sseStreamer.streamMetadataEvent(apiResponse.chatId, apiResponse)
+
+                    const chatflow = await chatflowsService.getChatflowById(req.params.id)
+                    if (chatflow && shouldAutoPlayTTS(chatflow.textToSpeech) && apiResponse.text) {
+                        const options = {
+                            orgId: req.body.orgId || '',
+                            chatflowid: req.params.id,
+                            chatId: apiResponse.chatId,
+                            appDataSource: getRunningExpressApp().AppDataSource,
+                            databaseEntities: getRunningExpressApp().AppDataSource?.entityMetadatas || []
+                        }
+
+                        await generateTTSForResponseStream(
+                            apiResponse.text,
+                            chatflow.textToSpeech,
+                            options,
+                            apiResponse.chatId,
+                            apiResponse.chatMessageId,
+                            sseStreamer
+                        )
+                    }
                 } catch (error) {
                     if (chatId) {
                         sseStreamer.streamErrorEvent(chatId, getErrorMessage(error))
-Original file line number
+Diff line change
@@ Expand Up / @@ -127,5 +127,8 @@ @@
                     }
                 ]
             ]
+        },
+        "dependencies": {
+            "@elevenlabs/elevenlabs-js": "^2.8.0"
         }
     }