Skip to content

Commit a892322

Browse files
committed
cli: extract validate_audio_extension(); clearer decode-failed hint; timings only on success
1 parent fc45bb8 commit a892322

File tree

1 file changed

+50
-5
lines changed

1 file changed

+50
-5
lines changed

examples/cli/cli.cpp

Lines changed: 50 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include <filesystem>
2+
#include <algorithm>
13
#include "common.h"
24
#include "common-whisper.h"
35

@@ -29,6 +31,37 @@ static void replace_all(std::string & s, const std::string & search, const std::
2931
s.insert(pos, replace);
3032
}
3133
}
34+
// helper: validate input file extension
35+
static bool validate_audio_extension(const std::string & fname_inp) {
36+
if (fname_inp == "-") return true; // allow stdin
37+
38+
std::string ext;
39+
try {
40+
ext = std::filesystem::path(fname_inp).extension().string();
41+
std::transform(ext.begin(), ext.end(), ext.begin(),
42+
[](unsigned char c){ return std::tolower(c); });
43+
} catch (...) {
44+
// if path parsing fails, let the decoder try anyway
45+
return true;
46+
}
47+
48+
auto ext_supported = [](const std::string &e) {
49+
// keep in sync with usage text
50+
return e == ".wav" || e == ".mp3" || e == ".flac" || e == ".ogg";
51+
};
52+
53+
if (!ext.empty() && !ext_supported(ext)) {
54+
fprintf(stderr,
55+
"error: unsupported audio extension '%s' for '%s'.\n"
56+
"supported: flac, mp3, ogg, wav.\n"
57+
"hint: convert with ffmpeg, e.g.:\n"
58+
" ffmpeg -i \"%s\" -ar 16000 -ac 1 -c:a pcm_s16le out.wav\n",
59+
ext.c_str(), fname_inp.c_str(), fname_inp.c_str());
60+
return false;
61+
}
62+
return true;
63+
}
64+
3265

3366
// command-line parameters
3467
struct whisper_params {
@@ -1051,8 +1084,13 @@ int main(int argc, char ** argv) {
10511084
}
10521085
}
10531086

1087+
bool processed_any = false;
10541088
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
10551089
const auto & fname_inp = params.fname_inp[f];
1090+
if (!validate_audio_extension(fname_inp)) {
1091+
continue;
1092+
}
1093+
10561094
struct fout_factory {
10571095
std::string fname_out;
10581096
const size_t basename_length;
@@ -1105,10 +1143,15 @@ int main(int argc, char ** argv) {
11051143
std::vector<float> pcmf32; // mono-channel F32 PCM
11061144
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
11071145

1108-
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
1109-
fprintf(stderr, "error: failed to read audio file '%s'\n", fname_inp.c_str());
1110-
continue;
1111-
}
1146+
if (!::read_audio_data(fname_inp, pcmf32, pcmf32s, params.diarize)) {
1147+
fprintf(stderr,
1148+
"error: failed to decode audio from '%s'.\n"
1149+
"Make sure the file is not corrupted and has one of: flac, mp3, ogg, wav.\n"
1150+
"If you still hit this, convert to a standard WAV with:\n"
1151+
" ffmpeg -i \"%s\" -ar 16000 -ac 1 -c:a pcm_s16le out.wav\n",
1152+
fname_inp.c_str(), fname_inp.c_str());
1153+
continue;
1154+
}
11121155

11131156
if (!whisper_is_multilingual(ctx)) {
11141157
if (params.language != "en" || params.translate) {
@@ -1258,6 +1301,8 @@ int main(int argc, char ** argv) {
12581301
fprintf(stderr, "%s: failed to process audio\n", argv[0]);
12591302
return 10;
12601303
}
1304+
processed_any = true;
1305+
12611306
}
12621307

12631308
// output stuff
@@ -1286,7 +1331,7 @@ int main(int argc, char ** argv) {
12861331
}
12871332
}
12881333

1289-
if (!params.no_prints) {
1334+
if (processed_any && !params.no_prints) {
12901335
whisper_print_timings(ctx);
12911336
}
12921337
whisper_free(ctx);

0 commit comments

Comments
 (0)