Skip to content

Commit 419aee3

Browse files
committed
Add an audio function to retrieve the audio data since last time.
Without it, `stream --save-audio` produces somehow choppy wav: `stream` calculates t_diff in milliseconds and combine audio pieces which are about step_ms long. WHISPER_SAMPLE_RATE / 1000 == only 16 but surprisingly human ears seem to be able to hear the gap as a noise.
1 parent e4e0598 commit 419aee3

File tree

3 files changed

+30
-14
lines changed

3 files changed

+30
-14
lines changed

examples/common-sdl.cpp

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ bool audio_async::clear() {
130130

131131
m_audio_pos = 0;
132132
m_audio_len = 0;
133+
m_audio_nxt = 0;
133134
}
134135

135136
return true;
@@ -172,6 +173,28 @@ void audio_async::callback(uint8_t * stream, int len) {
172173
}
173174

174175
void audio_async::get(int ms, std::vector<float> & result) {
176+
if (ms <= 0) {
177+
ms = m_len_ms;
178+
}
179+
180+
size_t n_samples = std::min<size_t>(m_audio_len, (m_sample_rate * ms) / 1000);
181+
182+
get_n(n_samples, result);
183+
}
184+
185+
void audio_async::next(std::vector<float> & result) {
186+
size_t n_samples;
187+
188+
if (m_audio_pos >= m_audio_nxt) {
189+
n_samples = m_audio_pos - m_audio_nxt;
190+
} else {
191+
n_samples = m_audio_len - m_audio_nxt + m_audio_pos;
192+
}
193+
194+
get_n(n_samples, result);
195+
}
196+
197+
void audio_async::get_n(size_t n_samples, std::vector<float> & result) {
175198
if (!m_dev_id_in) {
176199
fprintf(stderr, "%s: no audio device to get audio from!\n", __func__);
177200
return;
@@ -182,20 +205,9 @@ void audio_async::get(int ms, std::vector<float> & result) {
182205
return;
183206
}
184207

185-
result.clear();
186-
187208
{
188209
std::lock_guard<std::mutex> lock(m_mutex);
189210

190-
if (ms <= 0) {
191-
ms = m_len_ms;
192-
}
193-
194-
size_t n_samples = (m_sample_rate * ms) / 1000;
195-
if (n_samples > m_audio_len) {
196-
n_samples = m_audio_len;
197-
}
198-
199211
result.resize(n_samples);
200212

201213
int s0 = m_audio_pos - n_samples;
@@ -205,10 +217,12 @@ void audio_async::get(int ms, std::vector<float> & result) {
205217

206218
if (s0 + n_samples > m_audio.size()) {
207219
const size_t n0 = m_audio.size() - s0;
220+
m_audio_nxt = n_samples - n0;
208221

209222
memcpy(result.data(), &m_audio[s0], n0 * sizeof(float));
210-
memcpy(&result[n0], &m_audio[0], (n_samples - n0) * sizeof(float));
223+
memcpy(&result[n0], &m_audio[0], m_audio_nxt * sizeof(float));
211224
} else {
225+
m_audio_nxt = s0 + n_samples;
212226
memcpy(result.data(), &m_audio[s0], n_samples * sizeof(float));
213227
}
214228
}

examples/common-sdl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ class audio_async {
3030

3131
// get audio data from the circular buffer
3232
void get(int ms, std::vector<float> & audio);
33+
void next(std::vector<float> & audio);
34+
void get_n(size_t n_samples, std::vector<float> & audio);
3335

3436
private:
3537
SDL_AudioDeviceID m_dev_id_in = 0;
@@ -43,6 +45,7 @@ class audio_async {
4345
std::vector<float> m_audio;
4446
size_t m_audio_pos = 0;
4547
size_t m_audio_len = 0;
48+
size_t m_audio_nxt = 0;
4649
};
4750

4851
// Return false if need to quit

examples/stream/stream.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ int main(int argc, char ** argv) {
241241

242242
if (!use_vad) {
243243
while (true) {
244-
audio.get(params.step_ms, pcmf32_new);
244+
audio.next(pcmf32_new);
245245

246246
if ((int) pcmf32_new.size() > 2*n_samples_step) {
247247
fprintf(stderr, "\n\n%s: WARNING: cannot process audio fast enough, dropping audio ...\n\n", __func__);
@@ -250,7 +250,6 @@ int main(int argc, char ** argv) {
250250
}
251251

252252
if ((int) pcmf32_new.size() >= n_samples_step) {
253-
audio.clear();
254253
break;
255254
}
256255

0 commit comments

Comments
 (0)