|
| 1 | +#include <filesystem> |
| 2 | +#include <algorithm> |
1 | 3 | #include "common.h"
|
2 | 4 | #include "common-whisper.h"
|
3 | 5 |
|
@@ -29,6 +31,37 @@ static void replace_all(std::string & s, const std::string & search, const std::
|
29 | 31 | s.insert(pos, replace);
|
30 | 32 | }
|
31 | 33 | }
|
| 34 | +// helper: validate input file extension |
| 35 | +static bool validate_audio_extension(const std::string & fname_inp) { |
| 36 | + if (fname_inp == "-") return true; // allow stdin |
| 37 | + |
| 38 | + std::string ext; |
| 39 | + try { |
| 40 | + ext = std::filesystem::path(fname_inp).extension().string(); |
| 41 | + std::transform(ext.begin(), ext.end(), ext.begin(), |
| 42 | + [](unsigned char c){ return std::tolower(c); }); |
| 43 | + } catch (...) { |
| 44 | + // if path parsing fails, let the decoder try anyway |
| 45 | + return true; |
| 46 | + } |
| 47 | + |
| 48 | + auto ext_supported = [](const std::string &e) { |
| 49 | + // keep in sync with usage text |
| 50 | + return e == ".wav" || e == ".mp3" || e == ".flac" || e == ".ogg"; |
| 51 | + }; |
| 52 | + |
| 53 | + if (!ext.empty() && !ext_supported(ext)) { |
| 54 | + fprintf(stderr, |
| 55 | + "error: unsupported audio extension '%s' for '%s'.\n" |
| 56 | + "supported: flac, mp3, ogg, wav.\n" |
| 57 | + "hint: convert with ffmpeg, e.g.:\n" |
| 58 | + " ffmpeg -i \"%s\" -ar 16000 -ac 1 -c:a pcm_s16le out.wav\n", |
| 59 | + ext.c_str(), fname_inp.c_str(), fname_inp.c_str()); |
| 60 | + return false; |
| 61 | + } |
| 62 | + return true; |
| 63 | +} |
| 64 | + |
32 | 65 |
|
33 | 66 | // command-line parameters
|
34 | 67 | struct whisper_params {
|
@@ -1051,8 +1084,13 @@ int main(int argc, char ** argv) {
|
1051 | 1084 | }
|
1052 | 1085 | }
|
1053 | 1086 |
|
| 1087 | + bool processed_any = false; |
1054 | 1088 | for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
1055 | 1089 | const auto & fname_inp = params.fname_inp[f];
|
| 1090 | + if (!validate_audio_extension(fname_inp)) { |
| 1091 | + continue; |
| 1092 | + } |
| 1093 | + |
1056 | 1094 | struct fout_factory {
|
1057 | 1095 | std::string fname_out;
|
1058 | 1096 | const size_t basename_length;
|
@@ -1105,10 +1143,15 @@ int main(int argc, char ** argv) {
|
1105 | 1143 | std::vector<float> pcmf32; // mono-channel F32 PCM
|
1106 | 1144 | std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
1107 | 1145 |
|
1108 |
| - if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) { |
1109 |
| - fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str()); |
1110 |
| - continue; |
1111 |
| - } |
| 1146 | +if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) { |
| 1147 | + fprintf(stderr, |
| 1148 | + "error: failed to decode audio from '%s'.\n" |
| 1149 | + "Make sure the file is not corrupted and has one of: flac, mp3, ogg, wav.\n" |
| 1150 | + "If you still hit this, convert to a standard WAV with:\n" |
| 1151 | + " ffmpeg -i \"%s\" -ar 16000 -ac 1 -c:a pcm_s16le out.wav\n", |
| 1152 | + fname_inp.c_str(), fname_inp.c_str()); |
| 1153 | + continue; |
| 1154 | +} |
1112 | 1155 |
|
1113 | 1156 | if (!whisper_is_multilingual(ctx)) {
|
1114 | 1157 | if (params.language != "en" || params.translate) {
|
@@ -1258,6 +1301,8 @@ int main(int argc, char ** argv) {
|
1258 | 1301 | fprintf(stderr, "%s: failed to process audio\n", argv[0]);
|
1259 | 1302 | return 10;
|
1260 | 1303 | }
|
| 1304 | + processed_any = true; |
| 1305 | + |
1261 | 1306 | }
|
1262 | 1307 |
|
1263 | 1308 | // output stuff
|
@@ -1286,7 +1331,7 @@ int main(int argc, char ** argv) {
|
1286 | 1331 | }
|
1287 | 1332 | }
|
1288 | 1333 |
|
1289 |
| - if (!params.no_prints) { |
| 1334 | + if (processed_any && !params.no_prints) { |
1290 | 1335 | whisper_print_timings(ctx);
|
1291 | 1336 | }
|
1292 | 1337 | whisper_free(ctx);
|
|
0 commit comments