|
1 | 1 | #include <ruby.h> |
2 | 2 | #include "ruby_whisper.h" |
3 | | -#define DR_WAV_IMPLEMENTATION |
4 | | -#include "dr_wav.h" |
| 3 | +#include "common-whisper.h" |
5 | 4 | #include <string> |
6 | 5 | #include <vector> |
7 | 6 |
|
@@ -47,84 +46,9 @@ ruby_whisper_transcribe(int argc, VALUE *argv, VALUE self) { |
47 | 46 | std::vector<float> pcmf32; // mono-channel F32 PCM |
48 | 47 | std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM |
49 | 48 |
|
50 | | - // WAV input - this is directly from main.cpp example |
51 | | - { |
52 | | - drwav wav; |
53 | | - std::vector<uint8_t> wav_data; // used for pipe input from stdin |
54 | | - |
55 | | - if (fname_inp == "-") { |
56 | | - { |
57 | | - uint8_t buf[1024]; |
58 | | - while (true) { |
59 | | - const size_t n = fread(buf, 1, sizeof(buf), stdin); |
60 | | - if (n == 0) { |
61 | | - break; |
62 | | - } |
63 | | - wav_data.insert(wav_data.end(), buf, buf + n); |
64 | | - } |
65 | | - } |
66 | | - |
67 | | - if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) { |
68 | | - fprintf(stderr, "error: failed to open WAV file from stdin\n"); |
69 | | - return self; |
70 | | - } |
71 | | - |
72 | | - fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); |
73 | | - } else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) { |
74 | | - fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); |
75 | | - return self; |
76 | | - } |
77 | | - |
78 | | - if (wav.channels != 1 && wav.channels != 2) { |
79 | | - fprintf(stderr, "WAV file '%s' must be mono or stereo\n", fname_inp.c_str()); |
80 | | - return self; |
81 | | - } |
82 | | - |
83 | | - if (rwp->diarize && wav.channels != 2 && rwp->params.print_timestamps == false) { |
84 | | - fprintf(stderr, "WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str()); |
85 | | - return self; |
86 | | - } |
87 | | - |
88 | | - if (wav.sampleRate != WHISPER_SAMPLE_RATE) { |
89 | | - fprintf(stderr, "WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000); |
90 | | - return self; |
91 | | - } |
92 | | - |
93 | | - if (wav.bitsPerSample != 16) { |
94 | | - fprintf(stderr, "WAV file '%s' must be 16-bit\n", fname_inp.c_str()); |
95 | | - return self; |
96 | | - } |
97 | | - |
98 | | - const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8); |
99 | | - |
100 | | - std::vector<int16_t> pcm16; |
101 | | - pcm16.resize(n*wav.channels); |
102 | | - drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); |
103 | | - drwav_uninit(&wav); |
104 | | - |
105 | | - // convert to mono, float |
106 | | - pcmf32.resize(n); |
107 | | - if (wav.channels == 1) { |
108 | | - for (uint64_t i = 0; i < n; i++) { |
109 | | - pcmf32[i] = float(pcm16[i])/32768.0f; |
110 | | - } |
111 | | - } else { |
112 | | - for (uint64_t i = 0; i < n; i++) { |
113 | | - pcmf32[i] = float((int32_t)pcm16[2*i] + pcm16[2*i + 1])/65536.0f; |
114 | | - } |
115 | | - } |
116 | | - |
117 | | - if (rwp->diarize) { |
118 | | - // convert to stereo, float |
119 | | - pcmf32s.resize(2); |
120 | | - |
121 | | - pcmf32s[0].resize(n); |
122 | | - pcmf32s[1].resize(n); |
123 | | - for (uint64_t i = 0; i < n; i++) { |
124 | | - pcmf32s[0][i] = float(pcm16[2*i])/32768.0f; |
125 | | - pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f; |
126 | | - } |
127 | | - } |
| 49 | + if (!read_audio_data(fname_inp, pcmf32, pcmf32s, rwp->diarize)) { |
| 50 | + fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); |
| 51 | + return self; |
128 | 52 | } |
129 | 53 | { |
130 | 54 | static bool is_aborted = false; // NOTE: this should be atomic to avoid data race |
|
0 commit comments