|
| 1 | +/** |
| 2 | + * @file benchmark_audio_conversion.cpp |
| 3 | + * @brief Performance benchmarks for audio conversion and I/O |
| 4 | + */ |
| 5 | + |
| 6 | +#ifdef ENABLE_WHISPER |
| 7 | + |
| 8 | +#include "utils/audio_converter.h" |
| 9 | +#include "utils/signal_generator.h" |
| 10 | +#include "media/wav_writer.h" |
| 11 | + |
| 12 | +#include <benchmark/benchmark.h> |
| 13 | +#include <vector> |
| 14 | +#include <cstdio> |
| 15 | + |
| 16 | +using namespace ffvoice; |
| 17 | + |
| 18 | +// ============================================================================= |
| 19 | +// Audio Conversion Benchmarks |
| 20 | +// ============================================================================= |
| 21 | + |
| 22 | +static void BM_AudioConverter_Int16ToFloat(benchmark::State& state) { |
| 23 | + const size_t num_samples = state.range(0); |
| 24 | + SignalGenerator generator; |
| 25 | + std::vector<int16_t> int_samples = generator.GenerateSineWave(440.0, |
| 26 | + static_cast<double>(num_samples) / 16000, 16000, 0.5); |
| 27 | + std::vector<float> float_samples(num_samples); |
| 28 | + |
| 29 | + for (auto _ : state) { |
| 30 | + AudioConverter::Int16ToFloat(int_samples.data(), num_samples, float_samples.data()); |
| 31 | + benchmark::DoNotOptimize(float_samples.data()); |
| 32 | + } |
| 33 | + |
| 34 | + state.SetItemsProcessed(state.iterations() * num_samples); |
| 35 | + state.SetBytesProcessed(state.iterations() * num_samples * sizeof(int16_t)); |
| 36 | +} |
| 37 | + |
| 38 | +BENCHMARK(BM_AudioConverter_Int16ToFloat) |
| 39 | + ->Arg(480) |
| 40 | + ->Arg(1024) |
| 41 | + ->Arg(4096) |
| 42 | + ->Arg(16000) // 1 second @ 16kHz |
| 43 | + ->Arg(48000) // 1 second @ 48kHz |
| 44 | + ->Unit(benchmark::kMicrosecond); |
| 45 | + |
| 46 | +static void BM_AudioConverter_StereoToMono(benchmark::State& state) { |
| 47 | + const size_t num_frames = state.range(0); |
| 48 | + const size_t num_samples = num_frames * 2; // Stereo |
| 49 | + std::vector<float> stereo_samples(num_samples); |
| 50 | + std::vector<float> mono_samples(num_frames); |
| 51 | + |
| 52 | + // Fill with test data |
| 53 | + for (size_t i = 0; i < num_samples; i += 2) { |
| 54 | + stereo_samples[i] = static_cast<float>(i) / num_samples; // Left |
| 55 | + stereo_samples[i + 1] = static_cast<float>(i + 1) / num_samples; // Right |
| 56 | + } |
| 57 | + |
| 58 | + for (auto _ : state) { |
| 59 | + AudioConverter::StereoToMono(stereo_samples.data(), num_frames, mono_samples.data()); |
| 60 | + benchmark::DoNotOptimize(mono_samples.data()); |
| 61 | + } |
| 62 | + |
| 63 | + state.SetItemsProcessed(state.iterations() * num_frames); |
| 64 | + state.SetBytesProcessed(state.iterations() * num_samples * sizeof(float)); |
| 65 | +} |
| 66 | + |
| 67 | +BENCHMARK(BM_AudioConverter_StereoToMono) |
| 68 | + ->Arg(480) |
| 69 | + ->Arg(1024) |
| 70 | + ->Arg(4096) |
| 71 | + ->Arg(16000) |
| 72 | + ->Arg(48000) |
| 73 | + ->Unit(benchmark::kMicrosecond); |
| 74 | + |
| 75 | +static void BM_AudioConverter_Resample(benchmark::State& state) { |
| 76 | + const size_t input_size = state.range(0); |
| 77 | + const int in_sample_rate = 48000; |
| 78 | + const int out_sample_rate = 16000; |
| 79 | + const size_t output_size = (input_size * out_sample_rate) / in_sample_rate; |
| 80 | + |
| 81 | + std::vector<float> input_samples(input_size); |
| 82 | + std::vector<float> output_samples(output_size); |
| 83 | + |
| 84 | + for (size_t i = 0; i < input_size; ++i) { |
| 85 | + input_samples[i] = std::sin(2.0 * M_PI * 440.0 * i / in_sample_rate); |
| 86 | + } |
| 87 | + |
| 88 | + for (auto _ : state) { |
| 89 | + AudioConverter::Resample(input_samples.data(), input_size, in_sample_rate, |
| 90 | + output_samples.data(), output_size, out_sample_rate); |
| 91 | + benchmark::DoNotOptimize(output_samples.data()); |
| 92 | + } |
| 93 | + |
| 94 | + state.SetItemsProcessed(state.iterations() * input_size); |
| 95 | + state.SetBytesProcessed(state.iterations() * input_size * sizeof(float)); |
| 96 | +} |
| 97 | + |
| 98 | +BENCHMARK(BM_AudioConverter_Resample) |
| 99 | + ->Arg(480) |
| 100 | + ->Arg(1024) |
| 101 | + ->Arg(4096) |
| 102 | + ->Arg(48000) // 1 second @ 48kHz |
| 103 | + ->Unit(benchmark::kMicrosecond); |
| 104 | + |
| 105 | +// ============================================================================= |
| 106 | +// WAV Writer Benchmarks |
| 107 | +// ============================================================================= |
| 108 | + |
| 109 | +static void BM_WavWriter_WriteSamples(benchmark::State& state) { |
| 110 | + const int sample_rate = 48000; |
| 111 | + const int channels = 1; |
| 112 | + const size_t num_samples = state.range(0); |
| 113 | + const std::string test_file = "/tmp/benchmark_wav.wav"; |
| 114 | + |
| 115 | + SignalGenerator generator; |
| 116 | + std::vector<int16_t> samples = generator.GenerateSineWave(440.0, |
| 117 | + static_cast<double>(num_samples) / sample_rate, sample_rate, 0.5); |
| 118 | + |
| 119 | + for (auto _ : state) { |
| 120 | + state.PauseTiming(); |
| 121 | + WavWriter writer; |
| 122 | + writer.Open(test_file, sample_rate, channels, 16); |
| 123 | + state.ResumeTiming(); |
| 124 | + |
| 125 | + writer.WriteSamples(samples); |
| 126 | + |
| 127 | + state.PauseTiming(); |
| 128 | + writer.Close(); |
| 129 | + std::remove(test_file.c_str()); |
| 130 | + state.ResumeTiming(); |
| 131 | + } |
| 132 | + |
| 133 | + state.SetItemsProcessed(state.iterations() * num_samples); |
| 134 | + state.SetBytesProcessed(state.iterations() * num_samples * sizeof(int16_t)); |
| 135 | +} |
| 136 | + |
| 137 | +BENCHMARK(BM_WavWriter_WriteSamples) |
| 138 | + ->Arg(480) |
| 139 | + ->Arg(1024) |
| 140 | + ->Arg(4096) |
| 141 | + ->Arg(48000) |
| 142 | + ->Unit(benchmark::kMicrosecond); |
| 143 | + |
| 144 | +// ============================================================================= |
| 145 | +// Combined Conversion Pipeline Benchmarks |
| 146 | +// ============================================================================= |
| 147 | + |
| 148 | +static void BM_FullConversionPipeline(benchmark::State& state) { |
| 149 | + const size_t num_frames = state.range(0); |
| 150 | + const int in_sample_rate = 48000; |
| 151 | + const int out_sample_rate = 16000; |
| 152 | + |
| 153 | + // Generate stereo int16 samples |
| 154 | + std::vector<int16_t> stereo_int16(num_frames * 2); |
| 155 | + std::vector<float> float_samples(num_frames * 2); |
| 156 | + std::vector<float> mono_samples(num_frames); |
| 157 | + const size_t resampled_size = (num_frames * out_sample_rate) / in_sample_rate; |
| 158 | + std::vector<float> resampled(resampled_size); |
| 159 | + |
| 160 | + for (size_t i = 0; i < stereo_int16.size(); ++i) { |
| 161 | + stereo_int16[i] = static_cast<int16_t>( |
| 162 | + 32767.0 * std::sin(2.0 * M_PI * 440.0 * (i / 2) / in_sample_rate)); |
| 163 | + } |
| 164 | + |
| 165 | + for (auto _ : state) { |
| 166 | + // Step 1: int16 → float |
| 167 | + AudioConverter::Int16ToFloat(stereo_int16.data(), stereo_int16.size(), float_samples.data()); |
| 168 | + |
| 169 | + // Step 2: stereo → mono |
| 170 | + AudioConverter::StereoToMono(float_samples.data(), num_frames, mono_samples.data()); |
| 171 | + |
| 172 | + // Step 3: resample 48kHz → 16kHz |
| 173 | + AudioConverter::Resample(mono_samples.data(), num_frames, in_sample_rate, |
| 174 | + resampled.data(), resampled_size, out_sample_rate); |
| 175 | + |
| 176 | + benchmark::DoNotOptimize(resampled.data()); |
| 177 | + } |
| 178 | + |
| 179 | + state.SetItemsProcessed(state.iterations() * num_frames); |
| 180 | +} |
| 181 | + |
| 182 | +BENCHMARK(BM_FullConversionPipeline) |
| 183 | + ->Arg(480) |
| 184 | + ->Arg(1024) |
| 185 | + ->Arg(4096) |
| 186 | + ->Arg(48000) |
| 187 | + ->Unit(benchmark::kMicrosecond); |
| 188 | + |
| 189 | +#endif // ENABLE_WHISPER |
0 commit comments