1+ // fix problem with std::min and std::max
2+ #if defined(_WIN32)
3+ #define WIN32_LEAN_AND_MEAN
4+ #ifndef NOMINMAX
5+ # define NOMINMAX
6+ #endif
7+ #include < windows.h>
8+ #endif
9+
110#include " mtmd-audio.h"
211
312#define MINIAUDIO_IMPLEMENTATION
1019#define MA_API static
1120#include " miniaudio.h"
1221
13- // fix problem with std::min and std::max
14- #if defined(_WIN32)
15- #define WIN32_LEAN_AND_MEAN
16- #ifndef NOMINMAX
17- # define NOMINMAX
18- #endif
19- #include < windows.h>
20- #endif
21-
2222#define _USE_MATH_DEFINES // for M_PI
2323#include < cmath>
2424#include < cstdint>
@@ -316,86 +316,6 @@ bool preprocess_audio(
316316
317317namespace wav_utils {
318318
319- // Sinc function: sin(pi*x) / (pi*x)
320- static double calculate_sinc (double x) {
321- if (x == 0.0 ) return 1.0 ;
322- double pi_x = M_PI * x;
323- return std::sin (pi_x) / pi_x;
324- }
325-
326- // Hann window function
327- static double calculate_hann_window (double x, double half_width) {
328- if (half_width == 0.0 ) return 1.0 ;
329- if (std::abs (x) >= half_width) return 0.0 ;
330- return 0.5 * (1.0 + std::cos (M_PI * x / half_width));
331- }
332-
333- /* *
334- * @brief Resamples audio data using windowed sinc interpolation.
335- * @param kernel_half_width_input_samples Number of input samples on each side of the
336- * interpolation point for the sinc kernel. Larger values improve quality but cost performance.
337- */
338- static std::vector<float > resample_sinc (const std::vector<float >& samples,
339- int new_rate,
340- int old_rate,
341- int kernel_half_width_input_samples = 16 ) {
342- if (old_rate <= 0 || new_rate <= 0 ) {
343- throw std::invalid_argument (" Sample rates must be positive." );
344- }
345- if (samples.empty ()) return {};
346- if (new_rate == old_rate) return samples;
347- if (kernel_half_width_input_samples <= 0 ) {
348- throw std::invalid_argument (" Kernel half width must be positive." );
349- }
350-
351- double ratio = static_cast <double >(new_rate) / old_rate;
352- size_t new_num_samples = static_cast <size_t >(std::round (static_cast <double >(samples.size ()) * ratio));
353-
354- if (new_num_samples == 0 ) return {};
355-
356- std::vector<float > resampled_samples (new_num_samples);
357-
358- // Sinc argument scaling for anti-aliasing/anti-imaging:
359- // adjusts filter cutoff to the lower of the two Nyquist frequencies.
360- double sinc_argument_scale_factor = std::min (1.0 , ratio);
361-
362- for (size_t i = 0 ; i < new_num_samples; ++i) {
363- double t_new_sample_time = static_cast <double >(i) / new_rate;
364- double center_input_idx_float = t_new_sample_time * old_rate; // Fractional index in original samples
365-
366- double current_output_value = 0.0 ;
367- double current_kernel_sum = 0.0 ; // For normalizing filter gain
368-
369- int first_input_idx_to_consider = static_cast <int >(std::floor (center_input_idx_float)) - kernel_half_width_input_samples + 1 ;
370- int last_input_idx_to_consider = static_cast <int >(std::floor (center_input_idx_float)) + kernel_half_width_input_samples;
371-
372- for (int k = first_input_idx_to_consider; k <= last_input_idx_to_consider; ++k) {
373- if (k < 0 || k >= static_cast <int >(samples.size ())) {
374- continue ; // Effectively zero-padding
375- }
376-
377- // Distance (in original sample intervals) from original sample 'k' to new sample's ideal position
378- double time_diff_in_old_samples = center_input_idx_float - static_cast <double >(k);
379- double sinc_kernel_arg = time_diff_in_old_samples * sinc_argument_scale_factor;
380-
381- double sinc_value = calculate_sinc (sinc_kernel_arg);
382- double window_value = calculate_hann_window (time_diff_in_old_samples, static_cast <double >(kernel_half_width_input_samples));
383-
384- double tap_weight = sinc_value * window_value;
385-
386- current_output_value += samples[k] * tap_weight;
387- current_kernel_sum += tap_weight;
388- }
389-
390- if (current_kernel_sum != 0.0 ) {
391- resampled_samples[i] = static_cast <float >(current_output_value / current_kernel_sum);
392- } else {
393- resampled_samples[i] = 0 .0f ; // If kernel sum is zero (e.g., outside original signal range)
394- }
395- }
396- return resampled_samples;
397- }
398-
399319bool is_wav_buffer (const std::string buf) {
400320 // RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
401321 // WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
@@ -411,13 +331,7 @@ bool is_wav_buffer(const std::string buf) {
411331 return true ;
412332}
413333
414- // returns mono PCM data
415- #define MINIAUDIO_IMPLEMENTATION
416- #include " miniaudio.h"
417-
418- #include < vector>
419- // #include <iostream> // For debugging, can be removed
420-
334+ // returns true if the buffer is a valid WAV file
421335bool read_wav_from_buf (const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float > & pcmf32_mono) {
422336 ma_result result;
423337 // Request f32 output from the decoder. Channel count and sample rate are determined from the file.
@@ -426,7 +340,6 @@ bool read_wav_from_buf(const unsigned char * buf_in, size_t len, int target_samp
426340
427341 result = ma_decoder_init_memory (buf_in, len, &decoder_config, &decoder);
428342 if (result != MA_SUCCESS) {
429- fprintf (stderr, " Unable to initialize decoder\n " );
430343 return false ;
431344 }
432345
0 commit comments