Skip to content

Commit f389d7e

Browse files
authored
examples : add --print-confidence option to cli (#3150)
* examples : add --print-confidence option to cli This commit adds a new command-line option `--print-confidence` to the whisper-cli. When enabled, this option prints the confidence level of each token in the transcribed text using ANSI formatting codes. The confidence levels are represented using different styles: ```console main: confidence: highlighted (low confidence), underlined (medium), dim (high confidence) ``` Refs: #3135
1 parent 96d791a commit f389d7e

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

examples/cli/cli.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ struct whisper_params {
7070
bool no_prints = false;
7171
bool print_special = false;
7272
bool print_colors = false;
73+
bool print_confidence= false;
7374
bool print_progress = false;
7475
bool no_timestamps = false;
7576
bool log_score = false;
@@ -179,6 +180,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
179180
else if (arg == "-np" || arg == "--no-prints") { params.no_prints = true; }
180181
else if (arg == "-ps" || arg == "--print-special") { params.print_special = true; }
181182
else if (arg == "-pc" || arg == "--print-colors") { params.print_colors = true; }
183+
else if ( arg == "--print-confidence"){ params.print_confidence= true; }
182184
else if (arg == "-pp" || arg == "--print-progress") { params.print_progress = true; }
183185
else if (arg == "-nt" || arg == "--no-timestamps") { params.no_timestamps = true; }
184186
else if (arg == "-l" || arg == "--language") { params.language = whisper_param_turn_lowercase(ARGV_NEXT); }
@@ -257,6 +259,7 @@ static void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params
257259
fprintf(stderr, " -np, --no-prints [%-7s] do not print anything other than the results\n", params.no_prints ? "true" : "false");
258260
fprintf(stderr, " -ps, --print-special [%-7s] print special tokens\n", params.print_special ? "true" : "false");
259261
fprintf(stderr, " -pc, --print-colors [%-7s] print colors\n", params.print_colors ? "true" : "false");
262+
fprintf(stderr, " --print-confidence [%-7s] print confidence\n", params.print_confidence ? "true" : "false");
260263
fprintf(stderr, " -pp, --print-progress [%-7s] print progress\n", params.print_progress ? "true" : "false");
261264
fprintf(stderr, " -nt, --no-timestamps [%-7s] do not print timestamps\n", params.no_timestamps ? "true" : "false");
262265
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language ('auto' for auto-detect)\n", params.language.c_str());
@@ -386,6 +389,26 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
386389

387390
printf("%s%s%s%s", speaker.c_str(), k_colors[col].c_str(), text, "\033[0m");
388391
}
392+
} else if (params.print_confidence) {
393+
for (int j = 0; j < whisper_full_n_tokens(ctx, i); ++j) {
394+
if (params.print_special == false) {
395+
const whisper_token id = whisper_full_get_token_id(ctx, i, j);
396+
if (id >= whisper_token_eot(ctx)) {
397+
continue;
398+
}
399+
}
400+
401+
const char * text = whisper_full_get_token_text(ctx, i, j);
402+
const float p = whisper_full_get_token_p (ctx, i, j);
403+
404+
int style_idx = 2; // High confidence - dim
405+
if (p < 0.33) {
406+
style_idx = 0; // Low confidence - inverse (highlighted)
407+
} else if (p < 0.66) {
408+
style_idx = 1; // Medium confidence - underlined
409+
}
410+
printf("%s%s%s%s", speaker.c_str(), k_styles[style_idx].c_str(), text, "\033[0m");
411+
}
389412
} else {
390413
const char * text = whisper_full_get_segment_text(ctx, i);
391414

@@ -1115,6 +1138,8 @@ int main(int argc, char ** argv) {
11151138

11161139
if (params.print_colors) {
11171140
fprintf(stderr, "%s: color scheme: red (low confidence), yellow (medium), green (high confidence)\n", __func__);
1141+
} else if (params.print_confidence) {
1142+
fprintf(stderr, "%s: confidence: highlighted (low confidence), underlined (medium), dim (high confidence)\n", __func__);
11181143
}
11191144
fprintf(stderr, "\n");
11201145
}

examples/common.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,26 @@ const std::vector<std::string> k_colors = {
294294
set_xterm256_foreground( 78, 178, 101),
295295
};
296296

297+
// ANSI formatting codes
298+
static std::string set_inverse() {
299+
return "\033[7m";
300+
}
301+
302+
static std::string set_underline() {
303+
return "\033[4m";
304+
}
305+
306+
static std::string set_dim() {
307+
return "\033[2m";
308+
}
309+
310+
// Style scheme for different confidence levels
311+
const std::vector<std::string> k_styles = {
312+
set_inverse(), // Low confidence - inverse (highlighted)
313+
set_underline(), // Medium confidence - underlined
314+
set_dim(), // High confidence - dim
315+
};
316+
297317
//
298318
// Other utils
299319
//

0 commit comments

Comments
 (0)