|
1 | | -// fix problem with std::min and std::max |
2 | | -#if defined(_WIN32) |
3 | | -#define WIN32_LEAN_AND_MEAN |
4 | | -#ifndef NOMINMAX |
5 | | -# define NOMINMAX |
6 | | -#endif |
7 | | -#include <windows.h> |
8 | | -#endif |
9 | | - |
10 | 1 | #include "mtmd-audio.h" |
11 | 2 |
|
12 | | -//#define MTMD_AUDIO_DEBUG |
13 | | - |
14 | | -#define MINIAUDIO_IMPLEMENTATION |
15 | | -#ifndef MTMD_AUDIO_DEBUG |
16 | | -# define MA_NO_ENCODING |
17 | | -#endif |
18 | | -#define MA_NO_DEVICE_IO |
19 | | -#define MA_NO_RESOURCE_MANAGER |
20 | | -#define MA_NO_NODE_GRAPH |
21 | | -#define MA_NO_ENGINE |
22 | | -#define MA_NO_GENERATION |
23 | | -#define MA_API static |
24 | | -#include "miniaudio.h" |
25 | | - |
26 | 3 | #define _USE_MATH_DEFINES // for M_PI |
27 | 4 | #include <cmath> |
28 | 5 | #include <cstdint> |
@@ -359,69 +336,6 @@ bool preprocess_audio( |
359 | 336 | } // namespace whisper_preprocessor |
360 | 337 |
|
361 | 338 |
|
362 | | -namespace audio_helpers { |
363 | | - |
364 | | -bool is_audio_file(const char * buf, size_t len) { |
365 | | - if (len < 12) { |
366 | | - return false; |
367 | | - } |
368 | | - |
369 | | - // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format |
370 | | - // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html |
371 | | - bool is_wav = memcmp(buf, "RIFF", 4) == 0 && memcmp(buf + 8, "WAVE", 4) == 0; |
372 | | - bool is_mp3 = len >= 3 && ( |
373 | | - memcmp(buf, "ID3", 3) == 0 || |
374 | | - // Check for MPEG sync word (simplified check) |
375 | | - ((unsigned char)buf[0] == 0xFF && ((unsigned char)buf[1] & 0xE0) == 0xE0) |
376 | | - ); |
377 | | - bool is_flac = memcmp(buf, "fLaC", 4) == 0; |
378 | | - |
379 | | - return is_wav || is_mp3 || is_flac; |
380 | | -} |
381 | | - |
382 | | -// returns true if the buffer is a valid audio file |
383 | | -bool decode_audio_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) { |
384 | | - ma_result result; |
385 | | - const int channels = 1; |
386 | | - ma_decoder_config decoder_config = ma_decoder_config_init(ma_format_f32, channels, target_sampler_rate); |
387 | | - ma_decoder decoder; |
388 | | - |
389 | | - result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder); |
390 | | - if (result != MA_SUCCESS) { |
391 | | - return false; |
392 | | - } |
393 | | - |
394 | | - ma_uint64 frame_count; |
395 | | - ma_uint64 frames_read; |
396 | | - result = ma_decoder_get_length_in_pcm_frames(&decoder, &frame_count); |
397 | | - if (result != MA_SUCCESS) { |
398 | | - ma_decoder_uninit(&decoder); |
399 | | - return false; |
400 | | - } |
401 | | - |
402 | | - pcmf32_mono.resize(frame_count); |
403 | | - result = ma_decoder_read_pcm_frames(&decoder, pcmf32_mono.data(), frame_count, &frames_read); |
404 | | - if (result != MA_SUCCESS) { |
405 | | - ma_decoder_uninit(&decoder); |
406 | | - return false; |
407 | | - } |
408 | | - |
409 | | -#ifdef MTMD_AUDIO_DEBUG |
410 | | - // save audio to wav file |
411 | | - ma_encoder_config config = ma_encoder_config_init(ma_encoding_format_wav, ma_format_f32, 1, target_sampler_rate); |
412 | | - ma_encoder encoder; |
413 | | - ma_encoder_init_file("output.wav", &config, &encoder); |
414 | | - ma_encoder_write_pcm_frames(&encoder, pcmf32_mono.data(), pcmf32_mono.size(), &frames_read); |
415 | | - ma_encoder_uninit(&encoder); |
416 | | -#endif |
417 | | - |
418 | | - ma_decoder_uninit(&decoder); |
419 | | - return true; |
420 | | -} |
421 | | - |
422 | | -} // namespace wav_utils |
423 | | - |
424 | | - |
425 | 339 | // precalculated mel filter banks |
426 | 340 | // values are multiplied by 1000.0 to save space, and will be divided by 1000.0 in the end of the function |
427 | 341 | // |
|
0 commit comments