Skip to content

Commit cf38b47

Browse files
committed
fix windows
1 parent 6f23ad1 commit cf38b47

File tree

1 file changed

+10
-97
lines changed

1 file changed

+10
-97
lines changed

tools/mtmd/mtmd-audio.cpp

Lines changed: 10 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
// fix problem with std::min and std::max
2+
#if defined(_WIN32)
3+
#define WIN32_LEAN_AND_MEAN
4+
#ifndef NOMINMAX
5+
# define NOMINMAX
6+
#endif
7+
#include <windows.h>
8+
#endif
9+
110
#include "mtmd-audio.h"
211

312
#define MINIAUDIO_IMPLEMENTATION
@@ -10,15 +19,6 @@
1019
#define MA_API static
1120
#include "miniaudio.h"
1221

13-
// fix problem with std::min and std::max
14-
#if defined(_WIN32)
15-
#define WIN32_LEAN_AND_MEAN
16-
#ifndef NOMINMAX
17-
# define NOMINMAX
18-
#endif
19-
#include <windows.h>
20-
#endif
21-
2222
#define _USE_MATH_DEFINES // for M_PI
2323
#include <cmath>
2424
#include <cstdint>
@@ -316,86 +316,6 @@ bool preprocess_audio(
316316

317317
namespace wav_utils {
318318

319-
// Sinc function: sin(pi*x) / (pi*x)
320-
static double calculate_sinc(double x) {
321-
if (x == 0.0) return 1.0;
322-
double pi_x = M_PI * x;
323-
return std::sin(pi_x) / pi_x;
324-
}
325-
326-
// Hann window function
327-
static double calculate_hann_window(double x, double half_width) {
328-
if (half_width == 0.0) return 1.0;
329-
if (std::abs(x) >= half_width) return 0.0;
330-
return 0.5 * (1.0 + std::cos(M_PI * x / half_width));
331-
}
332-
333-
/**
334-
* @brief Resamples audio data using windowed sinc interpolation.
335-
* @param kernel_half_width_input_samples Number of input samples on each side of the
336-
* interpolation point for the sinc kernel. Larger values improve quality but cost performance.
337-
*/
338-
static std::vector<float> resample_sinc(const std::vector<float>& samples,
339-
int new_rate,
340-
int old_rate,
341-
int kernel_half_width_input_samples = 16) {
342-
if (old_rate <= 0 || new_rate <= 0) {
343-
throw std::invalid_argument("Sample rates must be positive.");
344-
}
345-
if (samples.empty()) return {};
346-
if (new_rate == old_rate) return samples;
347-
if (kernel_half_width_input_samples <= 0) {
348-
throw std::invalid_argument("Kernel half width must be positive.");
349-
}
350-
351-
double ratio = static_cast<double>(new_rate) / old_rate;
352-
size_t new_num_samples = static_cast<size_t>(std::round(static_cast<double>(samples.size()) * ratio));
353-
354-
if (new_num_samples == 0) return {};
355-
356-
std::vector<float> resampled_samples(new_num_samples);
357-
358-
// Sinc argument scaling for anti-aliasing/anti-imaging:
359-
// adjusts filter cutoff to the lower of the two Nyquist frequencies.
360-
double sinc_argument_scale_factor = std::min(1.0, ratio);
361-
362-
for (size_t i = 0; i < new_num_samples; ++i) {
363-
double t_new_sample_time = static_cast<double>(i) / new_rate;
364-
double center_input_idx_float = t_new_sample_time * old_rate; // Fractional index in original samples
365-
366-
double current_output_value = 0.0;
367-
double current_kernel_sum = 0.0; // For normalizing filter gain
368-
369-
int first_input_idx_to_consider = static_cast<int>(std::floor(center_input_idx_float)) - kernel_half_width_input_samples + 1;
370-
int last_input_idx_to_consider = static_cast<int>(std::floor(center_input_idx_float)) + kernel_half_width_input_samples;
371-
372-
for (int k = first_input_idx_to_consider; k <= last_input_idx_to_consider; ++k) {
373-
if (k < 0 || k >= static_cast<int>(samples.size())) {
374-
continue; // Effectively zero-padding
375-
}
376-
377-
// Distance (in original sample intervals) from original sample 'k' to new sample's ideal position
378-
double time_diff_in_old_samples = center_input_idx_float - static_cast<double>(k);
379-
double sinc_kernel_arg = time_diff_in_old_samples * sinc_argument_scale_factor;
380-
381-
double sinc_value = calculate_sinc(sinc_kernel_arg);
382-
double window_value = calculate_hann_window(time_diff_in_old_samples, static_cast<double>(kernel_half_width_input_samples));
383-
384-
double tap_weight = sinc_value * window_value;
385-
386-
current_output_value += samples[k] * tap_weight;
387-
current_kernel_sum += tap_weight;
388-
}
389-
390-
if (current_kernel_sum != 0.0) {
391-
resampled_samples[i] = static_cast<float>(current_output_value / current_kernel_sum);
392-
} else {
393-
resampled_samples[i] = 0.0f; // If kernel sum is zero (e.g., outside original signal range)
394-
}
395-
}
396-
return resampled_samples;
397-
}
398-
399319
bool is_wav_buffer(const std::string buf) {
400320
// RIFF ref: https://en.wikipedia.org/wiki/Resource_Interchange_File_Format
401321
// WAV ref: https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
@@ -411,13 +331,7 @@ bool is_wav_buffer(const std::string buf) {
411331
return true;
412332
}
413333

414-
// returns mono PCM data
415-
#define MINIAUDIO_IMPLEMENTATION
416-
#include "miniaudio.h"
417-
418-
#include <vector>
419-
// #include <iostream> // For debugging, can be removed
420-
334+
// returns true if the buffer is a valid WAV file
421335
bool read_wav_from_buf(const unsigned char * buf_in, size_t len, int target_sampler_rate, std::vector<float> & pcmf32_mono) {
422336
ma_result result;
423337
// Request f32 output from the decoder. Channel count and sample rate are determined from the file.
@@ -426,7 +340,6 @@ bool read_wav_from_buf(const unsigned char * buf_in, size_t len, int target_samp
426340

427341
result = ma_decoder_init_memory(buf_in, len, &decoder_config, &decoder);
428342
if (result != MA_SUCCESS) {
429-
fprintf(stderr, "Unable to initialize decoder\n");
430343
return false;
431344
}
432345

0 commit comments

Comments
 (0)