@@ -76,7 +76,7 @@ pub struct ElevenlabsTTS {
7676 websocket : WebSocket ,
7777}
7878
79- const MODEL_ID : & str = "eleven_flash_v2_5" ;
79+ const DEFAULT_MODEL_ID : & str = "eleven_flash_v2_5" ;
8080
8181pub enum OutputFormat {
8282 Pcm16000 ,
@@ -97,21 +97,47 @@ impl ElevenlabsTTS {
9797 token : String ,
9898 voice : String ,
9999 output_format : OutputFormat ,
100+ model_id : & str ,
101+ language_code : & str ,
100102 ) -> anyhow:: Result < Self > {
101103 let client = reqwest:: Client :: new ( ) ;
102- Self :: new_with_client ( & client, token, voice, output_format) . await
104+ Self :: new_with_client (
105+ & client,
106+ token,
107+ voice,
108+ output_format,
109+ model_id,
110+ language_code,
111+ )
112+ . await
103113 }
104114
105115 pub async fn new_with_client (
106116 client : & reqwest:: Client ,
107117 token : String ,
108118 voice : String ,
109119 output_format : OutputFormat ,
120+ model_id : & str ,
121+ language_code : & str ,
110122 ) -> anyhow:: Result < Self > {
111- let url = format ! (
112- "wss://api.elevenlabs.io/v1/text-to-speech/{voice}/stream-input?model_id={MODEL_ID}&output_format={output_format}" ,
123+ let model_id = if model_id. is_empty ( ) {
124+ DEFAULT_MODEL_ID
125+ } else {
126+ model_id
127+ } ;
128+
129+ let language_code = language_code. to_ascii_lowercase ( ) ;
130+
131+ let mut url = format ! (
132+ "wss://api.elevenlabs.io/v1/text-to-speech/{voice}/stream-input?model_id={model_id}&output_format={output_format}" ,
113133 ) ;
114134
135+ if !language_code. is_empty ( ) {
136+ url. push_str ( & format ! ( "&language_code={}" , language_code) ) ;
137+ }
138+
139+ log:: debug!( "Connect Elevenlabs TTS WebSocket URL: {}" , url) ;
140+
115141 let response = client
116142 . get ( url)
117143 . header ( "xi-api-key" , & token)
@@ -217,7 +243,7 @@ async fn test_elevenlabs_tts() {
217243 let token = std:: env:: var ( "ELEVENLABS_API_KEY" ) . unwrap ( ) ;
218244 let voice = std:: env:: var ( "ELEVENLABS_VOICE_ID" ) . unwrap ( ) ;
219245
220- let mut tts = ElevenlabsTTS :: new ( token, voice, OutputFormat :: Pcm16000 )
246+ let mut tts = ElevenlabsTTS :: new ( token, voice, OutputFormat :: Pcm16000 , "" , "" )
221247 . await
222248 . expect ( "Failed to create ElevenlabsTTS" ) ;
223249
@@ -248,3 +274,48 @@ async fn test_elevenlabs_tts() {
248274 ) ;
249275 std:: fs:: write ( "./resources/test/elevenlabs_out.wav" , wav) . unwrap ( ) ;
250276}
277+
278+ // cargo test --package echokit_server --bin echokit_server -- ai::elevenlabs::tts::test_elevenlabs_tts_with_language_code --exact --show-output
279+ #[ tokio:: test]
280+ async fn test_elevenlabs_tts_with_language_code ( ) {
281+ env_logger:: init ( ) ;
282+ let token = std:: env:: var ( "ELEVENLABS_API_KEY" ) . unwrap ( ) ;
283+ let voice = std:: env:: var ( "ELEVENLABS_VOICE_ID" ) . unwrap ( ) ;
284+
285+ let mut tts = ElevenlabsTTS :: new (
286+ token,
287+ voice,
288+ OutputFormat :: Pcm16000 ,
289+ "eleven_multilingual_v2" ,
290+ "ZH" ,
291+ )
292+ . await
293+ . expect ( "Failed to create ElevenlabsTTS" ) ;
294+
295+ tts. send_text ( "你好,这里是 elevenlabs TTS 的测试。" , true )
296+ . await
297+ . expect ( "Failed to send text" ) ;
298+
299+ tts. close_connection ( )
300+ . await
301+ . expect ( "Failed to close connection" ) ;
302+
303+ let mut samples = Vec :: new ( ) ;
304+
305+ while let Ok ( Some ( resp) ) = tts. next_audio_response ( ) . await {
306+ if let Some ( audio) = resp. get_audio_bytes ( ) {
307+ println ! ( "Received audio chunk of size: {}" , audio. len( ) ) ;
308+ samples. extend_from_slice ( & audio) ;
309+ }
310+ }
311+
312+ let wav = crate :: util:: pcm_to_wav (
313+ & samples,
314+ crate :: util:: WavConfig {
315+ channels : 1 ,
316+ sample_rate : 16000 ,
317+ bits_per_sample : 16 ,
318+ } ,
319+ ) ;
320+ std:: fs:: write ( "./resources/test/elevenlabs_out.zh.wav" , wav) . unwrap ( ) ;
321+ }
0 commit comments