44
55// third-party utilities
66// use your favorite implementations
7- #define DR_WAV_IMPLEMENTATION
8- #include " dr_wav.h"
7+ #define STB_VORBIS_HEADER_ONLY
8+ #include " stb_vorbis.c" /* Enables Vorbis decoding. */
9+
10+ #define MA_NO_DEVICE_IO
11+ #define MA_NO_THREADING
12+ #define MA_NO_ENCODING
13+ #define MA_NO_GENERATION
14+ #define MA_NO_RESOURCE_MANAGER
15+ #define MA_NO_NODE_GRAPH
16+ #define MINIAUDIO_IMPLEMENTATION
17+ #include " miniaudio.h"
918
1019#include < cmath>
1120#include < cstring>
@@ -639,9 +648,14 @@ bool is_wav_buffer(const std::string buf) {
639648 return true ;
640649}
641650
642- bool read_wav (const std::string & fname, std::vector<float >& pcmf32, std::vector<std::vector<float >>& pcmf32s, bool stereo) {
643- drwav wav;
644- std::vector<uint8_t > wav_data; // used for pipe input from stdin or ffmpeg decoding output
651+ bool read_audio_data (const std::string & fname, std::vector<float >& pcmf32, std::vector<std::vector<float >>& pcmf32s, bool stereo) {
652+ std::vector<uint8_t > audio_data; // used for pipe input from stdin or ffmpeg decoding output
653+
654+ ma_result result;
655+ ma_decoder_config decoder_config;
656+ ma_decoder decoder;
657+
658+ decoder_config = ma_decoder_config_init (ma_format_f32, stereo ? 2 : 1 , COMMON_SAMPLE_RATE);
645659
646660 if (fname == " -" ) {
647661 {
@@ -656,93 +670,55 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
656670 if (n == 0 ) {
657671 break ;
658672 }
659- wav_data .insert (wav_data .end (), buf, buf + n);
673+ audio_data .insert (audio_data .end (), buf, buf + n);
660674 }
661675 }
662676
663- if (drwav_init_memory (&wav, wav_data .data (), wav_data .size (), nullptr ) == false ) {
664- fprintf (stderr, " error : failed to open WAV file from stdin\n " );
677+ if (ma_decoder_init_memory (audio_data .data (), audio_data .size (), &decoder_config, &decoder) != MA_SUCCESS ) {
678+ fprintf (stderr, " Error : failed to open audio data from stdin\n " );
665679 return false ;
666- }
680+ }
667681
668- fprintf (stderr, " %s: read %zu bytes from stdin\n " , __func__, wav_data .size ());
682+ fprintf (stderr, " %s: read %zu bytes from stdin\n " , __func__, audio_data .size ());
669683 }
670684 else if (is_wav_buffer (fname)) {
671- if (drwav_init_memory (&wav, fname. c_str (), fname .size (), nullptr ) == false ) {
672- fprintf (stderr, " error : failed to open WAV file from fname buffer\n " );
685+ if (ma_decoder_init_memory (audio_data. data (), audio_data .size (), &decoder_config, &decoder) != MA_SUCCESS ) {
686+ fprintf (stderr, " Error : failed to open audio data from fname buffer\n " );
673687 return false ;
674- }
688+ }
675689 }
676- else if (drwav_init_file (&wav, fname.c_str (), nullptr ) == false ) {
690+ else if (ma_decoder_init_file ( fname.c_str (), &decoder_config, &decoder) != MA_SUCCESS ) {
677691#if defined(WHISPER_FFMPEG)
678- if (ffmpeg_decode_audio (fname, wav_data ) != 0 ) {
692+ if (ffmpeg_decode_audio (fname, audio_data ) != 0 ) {
679693 fprintf (stderr, " error: failed to ffmpeg decode '%s' \n " , fname.c_str ());
680694 return false ;
681695 }
682- if (drwav_init_memory (&wav, wav_data .data (), wav_data .size (), nullptr ) == false ) {
696+ if (ma_decoder_init_memory (audio_data .data (), audio_data .size (), &decoder_config, &decoder) != MA_SUCCESS ) {
683697 fprintf (stderr, " error: failed to read wav data as wav \n " );
684698 return false ;
685699 }
686700#else
687- fprintf (stderr, " error: failed to open '%s' as WAV file\n " , fname.c_str ());
701+ fprintf (stderr, " error: failed to open '%s' file\n " , fname.c_str ());
688702 return false ;
689703#endif
690704 }
691705
692- if (wav.channels != 1 && wav.channels != 2 ) {
693- fprintf (stderr, " %s: WAV file '%s' must be mono or stereo\n " , __func__, fname.c_str ());
694- drwav_uninit (&wav);
695- return false ;
696- }
697-
698- if (stereo && wav.channels != 2 ) {
699- fprintf (stderr, " %s: WAV file '%s' must be stereo for diarization\n " , __func__, fname.c_str ());
700- drwav_uninit (&wav);
701- return false ;
702- }
703-
704- if (wav.sampleRate != COMMON_SAMPLE_RATE) {
705- fprintf (stderr, " %s: WAV file '%s' must be %i kHz\n " , __func__, fname.c_str (), COMMON_SAMPLE_RATE/1000 );
706- drwav_uninit (&wav);
707- return false ;
708- }
709-
710- if (wav.bitsPerSample != 16 ) {
711- fprintf (stderr, " %s: WAV file '%s' must be 16-bit\n " , __func__, fname.c_str ());
712- drwav_uninit (&wav);
713- return false ;
714- }
715-
716- const uint64_t n = wav_data.empty () ? wav.totalPCMFrameCount : wav_data.size ()/(wav.channels *wav.bitsPerSample /8 );
706+ ma_uint64 frame_count;
707+ ma_uint64 frames_read;
717708
718- std::vector<int16_t > pcm16;
719- pcm16.resize (n*wav.channels );
720- drwav_read_pcm_frames_s16 (&wav, n, pcm16.data ());
721- drwav_uninit (&wav);
722-
723- // convert to mono, float
724- pcmf32.resize (n);
725- if (wav.channels == 1 ) {
726- for (uint64_t i = 0 ; i < n; i++) {
727- pcmf32[i] = float (pcm16[i])/32768 .0f ;
728- }
729- } else {
730- for (uint64_t i = 0 ; i < n; i++) {
731- pcmf32[i] = float (pcm16[2 *i] + pcm16[2 *i + 1 ])/65536 .0f ;
732- }
733- }
709+ ma_decoder_get_length_in_pcm_frames (&decoder, &frame_count);
710+ pcmf32.resize (stereo ? frame_count*2 : frame_count);
711+ ma_decoder_read_pcm_frames (&decoder, pcmf32.data (), frame_count, &frames_read);
734712
735713 if (stereo) {
736- // convert to stereo, float
737- pcmf32s.resize (2 );
738-
739- pcmf32s[0 ].resize (n);
740- pcmf32s[1 ].resize (n);
741- for (uint64_t i = 0 ; i < n; i++) {
742- pcmf32s[0 ][i] = float (pcm16[2 *i])/32768 .0f ;
743- pcmf32s[1 ][i] = float (pcm16[2 *i + 1 ])/32768 .0f ;
714+ pcmf32s[0 ].resize (frame_count);
715+ pcmf32s[1 ].resize (frame_count);
716+ for (uint64_t i = 0 ; i < frame_count; i++) {
717+ pcmf32s[0 ][i] = pcmf32[2 *i];
718+ pcmf32s[1 ][i] = pcmf32[2 *i + 1 ];
744719 }
745720 }
721+ ma_decoder_uninit (&decoder);
746722
747723 return true ;
748724}
@@ -909,3 +885,6 @@ bool speak_with_file(const std::string & command, const std::string & text, cons
909885 }
910886 return true ;
911887}
888+
889+ #undef STB_VORBIS_HEADER_ONLY
890+ #include " stb_vorbis.c"
0 commit comments