Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,8 @@
}
]
]
},
"dependencies": {
"@elevenlabs/elevenlabs-js": "^2.8.0"
}
}
26 changes: 26 additions & 0 deletions packages/components/credentials/ElevenLabsApi.credential.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import { INodeParams, INodeCredential } from '../src/Interface'

class ElevenLabsApi implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]

constructor() {
this.label = 'Eleven Labs API'
this.name = 'elevenLabsApi'
this.version = 1.0
this.description =
'Sign up for a Eleven Labs account and <a target="_blank" href="https://elevenlabs.io/app/settings/api-keys">create an API Key</a>.'
this.inputs = [
{
label: 'Eleven Labs API Key',
name: 'elevenLabsApiKey',
type: 'password'
}
]
}
}

module.exports = { credClass: ElevenLabsApi }
1 change: 1 addition & 0 deletions packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"@dqbd/tiktoken": "^1.0.21",
"@e2b/code-interpreter": "^1.5.1",
"@elastic/elasticsearch": "^8.9.0",
"@elevenlabs/elevenlabs-js": "^2.8.0",
"@flowiseai/nodevm": "^3.9.25",
"@getzep/zep-cloud": "~1.0.7",
"@getzep/zep-js": "^0.9.0",
Expand Down
3 changes: 3 additions & 0 deletions packages/components/src/Interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ export interface IServerSideEventStreamer {
streamAbortEvent(chatId: string): void
streamEndEvent(chatId: string): void
streamUsageMetadataEvent(chatId: string, data: any): void
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void
streamTTSEndEvent(chatId: string, chatMessageId: string): void
}

export enum FollowUpPromptProvider {
Expand Down
1 change: 1 addition & 0 deletions packages/components/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ dotenv.config({ path: envPath, override: true })
export * from './Interface'
export * from './utils'
export * from './speechToText'
export * from './textToSpeech'
export * from './storageUtils'
export * from './handler'
export * from '../evaluation/EvaluationRunner'
Expand Down
189 changes: 189 additions & 0 deletions packages/components/src/textToSpeech.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
import { ICommonObject } from './Interface'
import { getCredentialData } from './utils'
import OpenAI from 'openai'
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
import { Readable } from 'node:stream'
import type { ReadableStream } from 'node:stream/web'

const TextToSpeechType = {
OPENAI_TTS: 'openai',
ELEVEN_LABS_TTS: 'elevenlabs'
}

export const convertTextToSpeechStream = async (
text: string,
textToSpeechConfig: ICommonObject,
options: ICommonObject,
onStart: (format: string) => void,
onChunk: (chunk: Buffer) => void,
onEnd: () => void
): Promise<void> => {
return new Promise<void>((resolve, reject) => {
const processStream = async () => {
try {
if (textToSpeechConfig) {
const credentialId = textToSpeechConfig.credentialId as string
const credentialData = await getCredentialData(credentialId ?? '', options)

switch (textToSpeechConfig.name) {
case TextToSpeechType.OPENAI_TTS: {
onStart('mp3')

const openai = new OpenAI({
apiKey: credentialData.openAIApiKey
})

const response = await openai.audio.speech.create({
model: 'gpt-4o-mini-tts',
voice: (textToSpeechConfig.voice || 'alloy') as
| 'alloy'
| 'ash'
| 'ballad'
| 'coral'
| 'echo'
| 'fable'
| 'nova'
| 'onyx'
| 'sage'
| 'shimmer',
input: text,
response_format: 'mp3'
})

const stream = response.body as unknown as Readable
if (!stream) {
throw new Error('Failed to get response stream')
}

await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20)
break
}

case TextToSpeechType.ELEVEN_LABS_TTS: {
onStart('mp3')

const client = new ElevenLabsClient({
apiKey: credentialData.elevenLabsApiKey
})

const response = await client.textToSpeech.stream(textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM', {
text: text,
modelId: 'eleven_multilingual_v2'
})

const stream = Readable.fromWeb(response as unknown as ReadableStream)
if (!stream) {
throw new Error('Failed to get response stream')
}

await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40)
break
}
}
} else {
reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
}
} catch (error) {
reject(error)
}
}

processStream()
})
}

const processStreamWithRateLimit = async (
stream: Readable,
onChunk: (chunk: Buffer) => void,
onEnd: () => void,
resolve: () => void,
reject: (error: any) => void,
targetChunkSize: number = 640,
rateLimitMs: number = 20
) => {
const TARGET_CHUNK_SIZE = targetChunkSize
const RATE_LIMIT_MS = rateLimitMs

let buffer: Buffer = Buffer.alloc(0)
let isEnded = false

const processChunks = async () => {
while (!isEnded || buffer.length > 0) {
if (buffer.length >= TARGET_CHUNK_SIZE) {
const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE)
buffer = buffer.subarray(TARGET_CHUNK_SIZE)
onChunk(chunk)
await sleep(RATE_LIMIT_MS)
} else if (isEnded && buffer.length > 0) {
onChunk(buffer)
buffer = Buffer.alloc(0)
} else if (!isEnded) {
await sleep(RATE_LIMIT_MS)
} else {
break
}
}

onEnd()
resolve()
}

stream.on('data', (chunk) => {
buffer = Buffer.concat([buffer, Buffer.from(chunk)])
})

stream.on('end', () => {
isEnded = true
})

stream.on('error', (error) => {
reject(error)
})

processChunks().catch(reject)
}

const sleep = (ms: number): Promise<void> => {
return new Promise((resolve) => setTimeout(resolve, ms))
}

export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => {
const credentialData = await getCredentialData(credentialId ?? '', options)

switch (provider) {
case TextToSpeechType.OPENAI_TTS:
return [
{ id: 'alloy', name: 'Alloy' },
{ id: 'ash', name: 'Ash' },
{ id: 'ballad', name: 'Ballad' },
{ id: 'coral', name: 'Coral' },
{ id: 'echo', name: 'Echo' },
{ id: 'fable', name: 'Fable' },
{ id: 'nova', name: 'Nova' },
{ id: 'onyx', name: 'Onyx' },
{ id: 'sage', name: 'Sage' },
{ id: 'shimmer', name: 'Shimmer' }
]

case TextToSpeechType.ELEVEN_LABS_TTS: {
const client = new ElevenLabsClient({
apiKey: credentialData.elevenLabsApiKey
})

const voices = await client.voices.search({
pageSize: 100,
voiceType: 'default',
category: 'premade'
})

return voices.voices.map((voice) => ({
id: voice.voiceId,
name: voice.name,
category: voice.category
}))
}

default:
throw new Error(`Unsupported TTS provider: ${provider}`)
}
}
1 change: 1 addition & 0 deletions packages/server/src/Interface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ export interface IChatFlow {
apikeyid?: string
analytic?: string
speechToText?: string
textToSpeech?: string
chatbotConfig?: string
followUpPrompts?: string
apiConfig?: string
Expand Down
21 changes: 21 additions & 0 deletions packages/server/src/controllers/internal-predictions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { utilBuildChatflow } from '../../utils/buildChatflow'
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
import { getErrorMessage } from '../../errors/utils'
import { MODE } from '../../Interface'
import { generateTTSForResponseStream, shouldAutoPlayTTS } from '../../utils/buildChatflow'

// Send input message and get prediction result (Internal)
const createInternalPrediction = async (req: Request, res: Response, next: NextFunction) => {
Expand Down Expand Up @@ -38,6 +39,26 @@ const createAndStreamInternalPrediction = async (req: Request, res: Response, ne

const apiResponse = await utilBuildChatflow(req, true)
sseStreamer.streamMetadataEvent(apiResponse.chatId, apiResponse)

const chatflow = req.body.chatflow || req.body
if (shouldAutoPlayTTS(chatflow.textToSpeech) && apiResponse.text) {
const options = {
orgId: req.body.orgId || '',
chatflowid: req.body.chatflowid || '',
chatId: apiResponse.chatId,
appDataSource: getRunningExpressApp().AppDataSource,
databaseEntities: getRunningExpressApp().AppDataSource?.entityMetadatas || []
}

await generateTTSForResponseStream(
apiResponse.text,
chatflow.textToSpeech,
options,
apiResponse.chatId,
apiResponse.chatMessageId,
sseStreamer
)
}
} catch (error) {
if (chatId) {
sseStreamer.streamErrorEvent(chatId, getErrorMessage(error))
Expand Down
21 changes: 21 additions & 0 deletions packages/server/src/controllers/predictions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
import { v4 as uuidv4 } from 'uuid'
import { getErrorMessage } from '../../errors/utils'
import { MODE } from '../../Interface'
import { generateTTSForResponseStream, shouldAutoPlayTTS } from '../../utils/buildChatflow'

// Send input message and get prediction result (External)
const createPrediction = async (req: Request, res: Response, next: NextFunction) => {
Expand Down Expand Up @@ -76,6 +77,26 @@ const createPrediction = async (req: Request, res: Response, next: NextFunction)

const apiResponse = await predictionsServices.buildChatflow(req)
sseStreamer.streamMetadataEvent(apiResponse.chatId, apiResponse)

const chatflow = await chatflowsService.getChatflowById(req.params.id)
if (chatflow && shouldAutoPlayTTS(chatflow.textToSpeech) && apiResponse.text) {
const options = {
orgId: req.body.orgId || '',
chatflowid: req.params.id,
chatId: apiResponse.chatId,
appDataSource: getRunningExpressApp().AppDataSource,
databaseEntities: getRunningExpressApp().AppDataSource?.entityMetadatas || []
}

await generateTTSForResponseStream(
apiResponse.text,
chatflow.textToSpeech,
options,
apiResponse.chatId,
apiResponse.chatMessageId,
sseStreamer
)
}
} catch (error) {
if (chatId) {
sseStreamer.streamErrorEvent(chatId, getErrorMessage(error))
Expand Down
Loading