Skip to content

Commit 38b0310

Browse files
committed
feat: support set elevenlabs tts model_id & language_code
1 parent cba77be commit 38b0310

File tree

5 files changed

+86
-5
lines changed

5 files changed

+86
-5
lines changed

src/ai/elevenlabs/tts.rs

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ pub struct ElevenlabsTTS {
7676
websocket: WebSocket,
7777
}
7878

79-
const MODEL_ID: &str = "eleven_flash_v2_5";
79+
const DEFAULT_MODEL_ID: &str = "eleven_flash_v2_5";
8080

8181
pub enum OutputFormat {
8282
Pcm16000,
@@ -97,21 +97,47 @@ impl ElevenlabsTTS {
9797
token: String,
9898
voice: String,
9999
output_format: OutputFormat,
100+
model_id: &str,
101+
language_code: &str,
100102
) -> anyhow::Result<Self> {
101103
let client = reqwest::Client::new();
102-
Self::new_with_client(&client, token, voice, output_format).await
104+
Self::new_with_client(
105+
&client,
106+
token,
107+
voice,
108+
output_format,
109+
model_id,
110+
language_code,
111+
)
112+
.await
103113
}
104114

105115
pub async fn new_with_client(
106116
client: &reqwest::Client,
107117
token: String,
108118
voice: String,
109119
output_format: OutputFormat,
120+
model_id: &str,
121+
language_code: &str,
110122
) -> anyhow::Result<Self> {
111-
let url = format!(
112-
"wss://api.elevenlabs.io/v1/text-to-speech/{voice}/stream-input?model_id={MODEL_ID}&output_format={output_format}",
123+
let model_id = if model_id.is_empty() {
124+
DEFAULT_MODEL_ID
125+
} else {
126+
model_id
127+
};
128+
129+
let language_code = language_code.to_ascii_lowercase();
130+
131+
let mut url = format!(
132+
"wss://api.elevenlabs.io/v1/text-to-speech/{voice}/stream-input?model_id={model_id}&output_format={output_format}",
113133
);
114134

135+
if !language_code.is_empty() {
136+
url.push_str(&format!("&language_code={}", language_code));
137+
}
138+
139+
log::debug!("Connect Elevenlabs TTS WebSocket URL: {}", url);
140+
115141
let response = client
116142
.get(url)
117143
.header("xi-api-key", &token)
@@ -217,7 +243,7 @@ async fn test_elevenlabs_tts() {
217243
let token = std::env::var("ELEVENLABS_API_KEY").unwrap();
218244
let voice = std::env::var("ELEVENLABS_VOICE_ID").unwrap();
219245

220-
let mut tts = ElevenlabsTTS::new(token, voice, OutputFormat::Pcm16000)
246+
let mut tts = ElevenlabsTTS::new(token, voice, OutputFormat::Pcm16000, "", "")
221247
.await
222248
.expect("Failed to create ElevenlabsTTS");
223249

@@ -248,3 +274,48 @@ async fn test_elevenlabs_tts() {
248274
);
249275
std::fs::write("./resources/test/elevenlabs_out.wav", wav).unwrap();
250276
}
277+
278+
// cargo test --package echokit_server --bin echokit_server -- ai::elevenlabs::tts::test_elevenlabs_tts_with_language_code --exact --show-output
279+
#[tokio::test]
280+
async fn test_elevenlabs_tts_with_language_code() {
281+
env_logger::init();
282+
let token = std::env::var("ELEVENLABS_API_KEY").unwrap();
283+
let voice = std::env::var("ELEVENLABS_VOICE_ID").unwrap();
284+
285+
let mut tts = ElevenlabsTTS::new(
286+
token,
287+
voice,
288+
OutputFormat::Pcm16000,
289+
"eleven_multilingual_v2",
290+
"ZH",
291+
)
292+
.await
293+
.expect("Failed to create ElevenlabsTTS");
294+
295+
tts.send_text("你好,这里是 elevenlabs TTS 的测试。", true)
296+
.await
297+
.expect("Failed to send text");
298+
299+
tts.close_connection()
300+
.await
301+
.expect("Failed to close connection");
302+
303+
let mut samples = Vec::new();
304+
305+
while let Ok(Some(resp)) = tts.next_audio_response().await {
306+
if let Some(audio) = resp.get_audio_bytes() {
307+
println!("Received audio chunk of size: {}", audio.len());
308+
samples.extend_from_slice(&audio);
309+
}
310+
}
311+
312+
let wav = crate::util::pcm_to_wav(
313+
&samples,
314+
crate::util::WavConfig {
315+
channels: 1,
316+
sample_rate: 16000,
317+
bits_per_sample: 16,
318+
},
319+
);
320+
std::fs::write("./resources/test/elevenlabs_out.zh.wav", wav).unwrap();
321+
}

src/config.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,10 @@ pub struct CosyVoiceTTS {
131131
pub struct ElevenlabsTTS {
132132
pub token: String,
133133
pub voice: String,
134+
#[serde(default)]
135+
pub model_id: String,
136+
#[serde(default)]
137+
pub language_code: String,
134138
}
135139

136140
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]

src/services/realtime_ws.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,6 +1258,8 @@ async fn tts_and_send(
12581258
elevenlabs_tts.token.clone(),
12591259
elevenlabs_tts.voice.clone(),
12601260
elevenlabs::tts::OutputFormat::Pcm24000,
1261+
&elevenlabs_tts.model_id,
1262+
&elevenlabs_tts.language_code,
12611263
)
12621264
.await
12631265
.map_err(|e| anyhow::anyhow!("Elevenlabs TTS init error: {e}"))?;

src/services/ws.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,8 @@ async fn tts_and_send(
397397
elevenlabs_tts.token.clone(),
398398
elevenlabs_tts.voice.clone(),
399399
elevenlabs::tts::OutputFormat::Pcm16000,
400+
&elevenlabs_tts.model_id,
401+
&elevenlabs_tts.language_code,
400402
)
401403
.await
402404
.map_err(|e| anyhow::anyhow!("Elevenlabs TTS init error: {e}"))?;

src/services/ws/stable/tts.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,8 @@ async fn elevenlabs_tts(
387387
elevenlabs_tts.token.clone(),
388388
elevenlabs_tts.voice.clone(),
389389
crate::ai::elevenlabs::tts::OutputFormat::Pcm16000,
390+
&elevenlabs_tts.model_id,
391+
&elevenlabs_tts.language_code,
390392
)
391393
.await?;
392394

0 commit comments

Comments
 (0)