Skip to content

Commit 684a1e8

Browse files
committed
fix: new function get_samples_f32 support Infinite length
1 parent fef9fd4 commit 684a1e8

File tree

2 files changed

+220
-1
lines changed

2 files changed

+220
-1
lines changed

src/ai/tts.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ pub async fn groq(model: &str, token: &str, voice: &str, text: &str) -> anyhow::
9696
Ok(bytes)
9797
}
9898

99-
// cargo test --package esp_assistant --bin esp_assistant -- ai::tts:test_groq --exact --show-output
99+
// cargo test --package echokit_server --bin echokit_server -- ai::tts::test_groq --exact --show-output
100100
#[tokio::test]
101101
async fn test_groq() {
102102
let token = std::env::var("GROQ_API_KEY").unwrap();
@@ -107,6 +107,8 @@ async fn test_groq() {
107107
let head = reader.read_header().unwrap();
108108
println!("wav header: {:?}", head);
109109
std::fs::write("./resources/test/groq_out.wav", wav_audio).unwrap();
110+
let samples = crate::util::get_samples_f32(&mut reader).unwrap();
111+
println!("samples len: {}", samples.len());
110112
}
111113

112114
#[derive(Debug, serde::Serialize)]

src/util.rs

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::io::{Cursor, Write};
22

3+
use wav_io::{header::SampleFormat, reader::DecodeError};
4+
35
/// WAV 音频参数结构体
46
#[derive(Debug, Clone)]
57
pub struct WavConfig {
@@ -62,3 +64,218 @@ pub fn convert_samples_f32_to_i16_bytes(samples: &[f32]) -> Vec<u8> {
6264
}
6365
samples_i16
6466
}
67+
68+
pub fn get_samples_f32(reader: &mut wav_io::reader::Reader) -> Result<Vec<f32>, DecodeError> {
69+
let mut result: Vec<f32> = Vec::new();
70+
loop {
71+
// read chunks
72+
let chunk_tag = reader.read_str4();
73+
if chunk_tag == "" {
74+
break;
75+
}
76+
let size = reader.read_u32().unwrap_or(0) as u64;
77+
// todo: check tag
78+
// println!("[info] tag={:?}::{}", chunk_tag, size);
79+
if size == 0 {
80+
continue;
81+
}
82+
// data?
83+
if chunk_tag != "data" {
84+
reader.cur.set_position(reader.cur.position() + size);
85+
continue;
86+
}
87+
// read wav data
88+
let h = &reader.header.clone().unwrap();
89+
90+
let bytes_to_read = if size == 0xFFFFFFFF {
91+
let current_pos = reader.cur.position();
92+
let file_len = reader.cur.get_ref().len() as u64;
93+
file_len.saturating_sub(current_pos)
94+
} else {
95+
size
96+
};
97+
98+
let bytes_per_sample = (h.bits_per_sample / 8) as u64;
99+
let total_samples = bytes_to_read / bytes_per_sample;
100+
if result.is_empty() {
101+
result = Vec::with_capacity(total_samples as usize);
102+
}
103+
104+
match h.sample_format {
105+
// float
106+
SampleFormat::Float => {
107+
match h.bits_per_sample {
108+
32 => {
109+
for _ in 0..total_samples {
110+
let lv = reader.read_f32().unwrap_or(0.0);
111+
result.push(lv);
112+
}
113+
}
114+
64 => {
115+
for _ in 0..total_samples {
116+
let lv = reader.read_f64().unwrap_or(0.0);
117+
result.push(lv as f32); // down to f32
118+
}
119+
}
120+
_ => {
121+
return Err(DecodeError::UnsupportedWav {
122+
attribute: "bits per float sample",
123+
expected: &[32, 64],
124+
found: h.bits_per_sample as u32,
125+
})
126+
}
127+
}
128+
}
129+
// int
130+
SampleFormat::Int => {
131+
match h.bits_per_sample {
132+
8 => {
133+
for _ in 0..total_samples {
134+
// 0..255
135+
let lv = reader.read_u8().unwrap_or(0);
136+
let fv = lv.wrapping_sub(128) as i8 as f32 / (i8::MAX as f32);
137+
result.push(fv);
138+
}
139+
}
140+
16 => {
141+
for _ in 0..total_samples {
142+
let lv = reader.read_i16().unwrap_or(0);
143+
let fv = lv as f32 / (i16::MAX as f32);
144+
result.push(fv);
145+
}
146+
}
147+
24 => {
148+
for _ in 0..total_samples {
149+
let lv = reader.read_i24().unwrap_or(0);
150+
let fv = lv as f32 / ((1 << 23) - 1) as f32;
151+
result.push(fv);
152+
}
153+
}
154+
32 => {
155+
for _ in 0..total_samples {
156+
let lv = reader.read_i32().unwrap_or(0);
157+
let fv = lv as f32 / (i32::MAX as f32);
158+
result.push(fv);
159+
}
160+
}
161+
_ => {
162+
return Err(DecodeError::UnsupportedWav {
163+
attribute: "bits per integer sample",
164+
expected: &[8, 16, 24, 32],
165+
found: h.bits_per_sample as u32,
166+
})
167+
}
168+
}
169+
}
170+
_ => return Err(DecodeError::UnsupportedEncoding),
171+
}
172+
}
173+
Ok(result)
174+
}
175+
176+
pub fn get_samples_i16(reader: &mut wav_io::reader::Reader) -> Result<Vec<i16>, DecodeError> {
177+
let mut result: Vec<i16> = Vec::new();
178+
loop {
179+
// read chunks
180+
let chunk_tag = reader.read_str4();
181+
if chunk_tag == "" {
182+
break;
183+
}
184+
let size = reader.read_u32().unwrap_or(0) as u64;
185+
// todo: check tag
186+
// println!("[info] tag={:?}::{}", chunk_tag, size);
187+
if size == 0 {
188+
continue;
189+
}
190+
// data?
191+
if chunk_tag != "data" {
192+
reader.cur.set_position(reader.cur.position() + size);
193+
continue;
194+
}
195+
// read wav data
196+
let h = &reader.header.clone().unwrap();
197+
198+
let bytes_to_read = if size == 0xFFFFFFFF {
199+
let current_pos = reader.cur.position();
200+
let file_len = reader.cur.get_ref().len() as u64;
201+
file_len.saturating_sub(current_pos)
202+
} else {
203+
size
204+
};
205+
206+
let bytes_per_sample = (h.bits_per_sample / 8) as u64;
207+
let total_samples = bytes_to_read / bytes_per_sample;
208+
if result.is_empty() {
209+
result = Vec::with_capacity(total_samples as usize);
210+
}
211+
212+
match h.sample_format {
213+
// float
214+
SampleFormat::Float => match h.bits_per_sample {
215+
32 => {
216+
for _ in 0..total_samples {
217+
let lv = reader.read_f32().unwrap_or(0.0);
218+
let sample = (lv.clamp(-1.0, 1.0) * i16::MAX as f32) as i16;
219+
result.push(sample);
220+
}
221+
}
222+
64 => {
223+
for _ in 0..total_samples {
224+
let lv = reader.read_f64().unwrap_or(0.0);
225+
let sample = ((lv as f32).clamp(-1.0, 1.0) * i16::MAX as f32) as i16;
226+
result.push(sample);
227+
}
228+
}
229+
_ => {
230+
return Err(DecodeError::UnsupportedWav {
231+
attribute: "bits per float sample",
232+
expected: &[32, 64],
233+
found: h.bits_per_sample as u32,
234+
})
235+
}
236+
},
237+
// int
238+
SampleFormat::Int => match h.bits_per_sample {
239+
8 => {
240+
for _ in 0..total_samples {
241+
let lv = reader.read_u8().unwrap_or(0);
242+
let normalized = (lv as f32) / (i8::MAX as f32);
243+
let sample = (normalized * i16::MAX as f32) as i16;
244+
result.push(sample);
245+
}
246+
}
247+
16 => {
248+
for _ in 0..total_samples {
249+
let lv = reader.read_i16().unwrap_or(0);
250+
result.push(lv);
251+
}
252+
}
253+
24 => {
254+
for _ in 0..total_samples {
255+
let lv = reader.read_i24().unwrap_or(0);
256+
let normalized = lv as f32 / 0xFFFFFF as f32;
257+
let sample = (normalized * i16::MAX as f32) as i16;
258+
result.push(sample);
259+
}
260+
}
261+
32 => {
262+
for _ in 0..total_samples {
263+
let lv = reader.read_i32().unwrap_or(0);
264+
let normalized = lv as f32 / i32::MAX as f32;
265+
let sample = (normalized * i16::MAX as f32) as i16;
266+
result.push(sample);
267+
}
268+
}
269+
_ => {
270+
return Err(DecodeError::UnsupportedWav {
271+
attribute: "bits per integer sample",
272+
expected: &[8, 16, 24, 32],
273+
found: h.bits_per_sample as u32,
274+
})
275+
}
276+
},
277+
_ => return Err(DecodeError::UnsupportedEncoding),
278+
}
279+
}
280+
Ok(result)
281+
}

0 commit comments

Comments
 (0)