Skip to content

Commit 67aef88

Browse files
committed
fix: resample wav from fish, groq
1 parent 43605ec commit 67aef88

File tree

1 file changed

+43
-6
lines changed

1 file changed

+43
-6
lines changed

src/services/ws/stable/tts.rs

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ async fn gsv_stable_tts(
216216
text: &str,
217217
tts_resp_tx: &TTSResponseTx,
218218
) -> anyhow::Result<()> {
219-
let bytes = retry_gsv_tts(
219+
let wav_data = retry_gsv_tts(
220220
client,
221221
&tts.url,
222222
&tts.speaker,
@@ -227,7 +227,7 @@ async fn gsv_stable_tts(
227227
)
228228
.await?;
229229

230-
tts_resp_tx.send(bytes.to_vec())?;
230+
send_wav(tts_resp_tx, wav_data).await?;
231231
Ok(())
232232
}
233233

@@ -317,17 +317,17 @@ async fn groq_tts(
317317
text: &str,
318318
tts_resp_tx: &TTSResponseTx,
319319
) -> anyhow::Result<()> {
320-
let bytes =
320+
let wav_data =
321321
crate::ai::tts::groq(client, &tts.url, &tts.model, &tts.api_key, &tts.voice, text).await?;
322322

323-
tts_resp_tx.send(bytes.to_vec())?;
323+
send_wav(tts_resp_tx, wav_data).await?;
324324
Ok(())
325325
}
326326

327327
async fn fish_tts(tts: &FishTTS, text: &str, tts_resp_tx: &TTSResponseTx) -> anyhow::Result<()> {
328-
let bytes = crate::ai::tts::fish_tts(&tts.api_key, &tts.speaker, text).await?;
328+
let wav_data = crate::ai::tts::fish_tts(&tts.api_key, &tts.speaker, text).await?;
329329

330-
tts_resp_tx.send(bytes.to_vec())?;
330+
send_wav(tts_resp_tx, wav_data).await?;
331331
Ok(())
332332
}
333333

@@ -378,3 +378,40 @@ async fn elevenlabs_tts(
378378

379379
Ok(())
380380
}
381+
382+
async fn send_wav(tts_resp_tx: &TTSResponseTx, wav_data: Bytes) -> anyhow::Result<()> {
383+
let mut reader = wav_io::reader::Reader::from_vec(wav_data.into())
384+
.map_err(|e| anyhow::anyhow!("wav_io reader error: {e}"))?;
385+
386+
let header = reader.read_header()?;
387+
let mut samples = crate::util::get_samples_f32(&mut reader)
388+
.map_err(|e| anyhow::anyhow!("get_samples_f32 error: {e}"))?;
389+
390+
let out_hz = 16000;
391+
392+
if header.sample_rate != out_hz {
393+
// resample to 16000
394+
log::debug!("resampling from {} to 16000", header.sample_rate);
395+
samples = wav_io::resample::linear(samples, header.channels, header.sample_rate, out_hz);
396+
}
397+
let audio_16k = wav_io::convert_samples_f32_to_i16(&samples);
398+
399+
for chunk in audio_16k.chunks(5 * out_hz as usize / 10) {
400+
let buff = if cfg!(target_endian = "big") {
401+
let mut buff = Vec::with_capacity(chunk.len() * 2);
402+
for i in chunk {
403+
buff.extend_from_slice(&i.to_le_bytes());
404+
}
405+
buff
406+
} else {
407+
let chunk_bytes =
408+
unsafe { std::slice::from_raw_parts(chunk.as_ptr() as *const u8, chunk.len() * 2) };
409+
chunk_bytes.to_vec()
410+
};
411+
412+
// std::mem::swap(&mut send_data, &mut buff);
413+
tts_resp_tx.send(buff)?;
414+
}
415+
416+
Ok(())
417+
}

0 commit comments

Comments
 (0)