99#include < cstdio>
1010#include < string>
1111#include < thread>
12+ #include < utility>
1213#include < vector>
1314#include < cstring>
1415
@@ -379,15 +380,7 @@ static void whisper_print_segment_callback(struct whisper_context * ctx, struct
379380 }
380381}
381382
382- static bool output_txt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
383- std::ofstream fout (fname);
384- if (!fout.is_open ()) {
385- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
386- return false ;
387- }
388-
389- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
390-
383+ static void output_txt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
391384 const int n_segments = whisper_full_n_segments (ctx);
392385 for (int i = 0 ; i < n_segments; ++i) {
393386 const char * text = whisper_full_get_segment_text (ctx, i);
@@ -402,19 +395,9 @@ static bool output_txt(struct whisper_context * ctx, const char * fname, const w
402395
403396 fout << speaker << text << " \n " ;
404397 }
405-
406- return true ;
407398}
408399
409- static bool output_vtt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
410- std::ofstream fout (fname);
411- if (!fout.is_open ()) {
412- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
413- return false ;
414- }
415-
416- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
417-
400+ static void output_vtt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
418401 fout << " WEBVTT\n\n " ;
419402
420403 const int n_segments = whisper_full_n_segments (ctx);
@@ -434,19 +417,9 @@ static bool output_vtt(struct whisper_context * ctx, const char * fname, const w
434417 fout << to_timestamp (t0) << " --> " << to_timestamp (t1) << " \n " ;
435418 fout << speaker << text << " \n\n " ;
436419 }
437-
438- return true ;
439420}
440421
441- static bool output_srt (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
442- std::ofstream fout (fname);
443- if (!fout.is_open ()) {
444- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
445- return false ;
446- }
447-
448- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
449-
422+ static void output_srt (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
450423 const int n_segments = whisper_full_n_segments (ctx);
451424 for (int i = 0 ; i < n_segments; ++i) {
452425 const char * text = whisper_full_get_segment_text (ctx, i);
@@ -463,8 +436,6 @@ static bool output_srt(struct whisper_context * ctx, const char * fname, const w
463436 fout << to_timestamp (t0, true ) << " --> " << to_timestamp (t1, true ) << " \n " ;
464437 fout << speaker << text << " \n\n " ;
465438 }
466-
467- return true ;
468439}
469440
470441static char * escape_double_quotes_and_backslashes (const char * str) {
@@ -530,15 +501,7 @@ static char * escape_double_quotes_in_csv(const char * str) {
530501 return escaped;
531502}
532503
533- static bool output_csv (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
534- std::ofstream fout (fname);
535- if (!fout.is_open ()) {
536- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
537- return false ;
538- }
539-
540- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
541-
504+ static void output_csv (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
542505 const int n_segments = whisper_full_n_segments (ctx);
543506 fout << " start,end," ;
544507 if (params.diarize && pcmf32s.size () == 2 )
@@ -561,14 +524,9 @@ static bool output_csv(struct whisper_context * ctx, const char * fname, const w
561524 }
562525 fout << " \" " << text_escaped << " \"\n " ;
563526 }
564-
565- return true ;
566527}
567528
568- static bool output_score (struct whisper_context * ctx, const char * fname, const whisper_params & /* params*/ , std::vector<std::vector<float >> /* pcmf32s*/ ) {
569- std::ofstream fout (fname);
570- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
571-
529+ static void output_score (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & /* params*/ , std::vector<std::vector<float >> /* pcmf32s*/ ) {
572530 const int n_segments = whisper_full_n_segments (ctx);
573531 // fprintf(stderr,"segments: %d\n",n_segments);
574532 for (int i = 0 ; i < n_segments; ++i) {
@@ -581,16 +539,14 @@ static bool output_score(struct whisper_context * ctx, const char * fname, const
581539 // fprintf(stderr,"token: %s %f\n",token,probability);
582540 }
583541 }
584- return true ;
585542}
586543
587- static bool output_json (
544+ static void output_json (
588545 struct whisper_context * ctx,
589- const char * fname ,
546+ std::ofstream & fout ,
590547 const whisper_params & params,
591- std::vector<std::vector<float >> pcmf32s,
592- bool full) {
593- std::ofstream fout (fname);
548+ std::vector<std::vector<float >> pcmf32s) {
549+ const bool full = params.output_jsn_full ;
594550 int indent = 0 ;
595551
596552 auto doindent = [&]() {
@@ -670,12 +626,6 @@ static bool output_json(
670626 end_obj (end);
671627 };
672628
673- if (!fout.is_open ()) {
674- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
675- return false ;
676- }
677-
678- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
679629 start_obj (nullptr );
680630 value_s (" systeminfo" , whisper_print_system_info (), false );
681631 start_obj (" model" );
@@ -749,17 +699,12 @@ static bool output_json(
749699
750700 end_arr (true );
751701 end_obj (true );
752- return true ;
753702}
754703
755704// karaoke video generation
756705// outputs a bash script that uses ffmpeg to generate a video with the subtitles
757706// TODO: font parameter adjustments
758- static bool output_wts (struct whisper_context * ctx, const char * fname, const char * fname_inp, const whisper_params & params, float t_sec, std::vector<std::vector<float >> pcmf32s) {
759- std::ofstream fout (fname);
760-
761- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
762-
707+ static bool output_wts (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s, const char * fname_inp, float t_sec, const char * fname_out) {
763708 static const char * font = params.font_path .c_str ();
764709
765710 std::ifstream fin (font);
@@ -875,20 +820,12 @@ static bool output_wts(struct whisper_context * ctx, const char * fname, const c
875820
876821 fout.close ();
877822
878- fprintf (stderr, " %s: run 'source %s' to generate karaoke video\n " , __func__, fname );
823+ fprintf (stderr, " # %s: run 'source %s' to generate karaoke video\n " , __func__, fname_out );
879824
880825 return true ;
881826}
882827
883- static bool output_lrc (struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
884- std::ofstream fout (fname);
885- if (!fout.is_open ()) {
886- fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname);
887- return false ;
888- }
889-
890- fprintf (stderr, " %s: saving output to '%s'\n " , __func__, fname);
891-
828+ static void output_lrc (struct whisper_context * ctx, std::ofstream & fout, const whisper_params & params, std::vector<std::vector<float >> pcmf32s) {
892829 fout << " [by:whisper.cpp]\n " ;
893830
894831 const int n_segments = whisper_full_n_segments (ctx);
@@ -916,8 +853,6 @@ static bool output_lrc(struct whisper_context * ctx, const char * fname, const w
916853
917854 fout << ' [' << timestamp_lrc << ' ]' << speaker << text << " \n " ;
918855 }
919-
920- return true ;
921856}
922857
923858
@@ -1066,8 +1001,52 @@ int main(int argc, char ** argv) {
10661001 }
10671002
10681003 for (int f = 0 ; f < (int ) params.fname_inp .size (); ++f) {
1069- const auto fname_inp = params.fname_inp [f];
1070- const auto fname_out = f < (int ) params.fname_out .size () && !params.fname_out [f].empty () ? params.fname_out [f] : params.fname_inp [f];
1004+ const auto & fname_inp = params.fname_inp [f];
1005+ struct fout_factory {
1006+ std::string fname_out;
1007+ const size_t basename_length;
1008+ const bool is_stdout;
1009+ bool used_stdout;
1010+ decltype (whisper_print_segment_callback) * const print_segment_callback;
1011+ std::ofstream fout;
1012+
1013+ fout_factory (const std::string & fname_out_, const std::string & fname_inp, whisper_params & params) :
1014+ fname_out{!fname_out_.empty () ? fname_out_ : fname_inp},
1015+ basename_length{fname_out.size ()},
1016+ is_stdout{fname_out == " -" },
1017+ used_stdout{},
1018+ print_segment_callback{is_stdout ? nullptr : whisper_print_segment_callback} {
1019+ if (!print_segment_callback) {
1020+ params.print_progress = false ;
1021+ }
1022+ }
1023+
1024+ bool open (const char * ext, const char * function) {
1025+ if (is_stdout) {
1026+ if (std::exchange (used_stdout, true )) {
1027+ fprintf (stderr, " warning: Not appending multiple file formats to stdout\n " );
1028+ return false ;
1029+ }
1030+ #ifdef _WIN32
1031+ fout = std::ofstream{" CON" };
1032+ #else
1033+ fout = std::ofstream{" /dev/stdout" };
1034+ #endif
1035+ // Not using fprintf stderr here because it might equal stdout
1036+ // Also assuming /dev is mounted
1037+ return true ;
1038+ }
1039+ fname_out.resize (basename_length);
1040+ fname_out += ext;
1041+ fout = std::ofstream{fname_out};
1042+ if (!fout.is_open ()) {
1043+ fprintf (stderr, " %s: failed to open '%s' for writing\n " , __func__, fname_out.c_str ());
1044+ return false ;
1045+ }
1046+ fprintf (stderr, " %s: saving output to '%s'\n " , function, fname_out.c_str ());
1047+ return true ;
1048+ }
1049+ } fout_factory{f < (int ) params.fname_out .size () ? params.fname_out [f] : " " , fname_inp, params};
10711050
10721051 std::vector<float > pcmf32; // mono-channel F32 PCM
10731052 std::vector<std::vector<float >> pcmf32s; // stereo-channel F32 PCM
@@ -1172,7 +1151,7 @@ int main(int argc, char ** argv) {
11721151
11731152 // this callback is called on each new segment
11741153 if (!wparams.print_realtime ) {
1175- wparams.new_segment_callback = whisper_print_segment_callback ;
1154+ wparams.new_segment_callback = fout_factory. print_segment_callback ;
11761155 wparams.new_segment_callback_user_data = &user_data;
11771156 }
11781157
@@ -1214,54 +1193,26 @@ int main(int argc, char ** argv) {
12141193
12151194 // output stuff
12161195 {
1217- printf (" \n " );
1218-
1219- // output to text file
1220- if (params.output_txt ) {
1221- const auto fname_txt = fname_out + " .txt" ;
1222- output_txt (ctx, fname_txt.c_str (), params, pcmf32s);
1223- }
1224-
1225- // output to VTT file
1226- if (params.output_vtt ) {
1227- const auto fname_vtt = fname_out + " .vtt" ;
1228- output_vtt (ctx, fname_vtt.c_str (), params, pcmf32s);
1229- }
1230-
1231- // output to SRT file
1232- if (params.output_srt ) {
1233- const auto fname_srt = fname_out + " .srt" ;
1234- output_srt (ctx, fname_srt.c_str (), params, pcmf32s);
1235- }
1236-
1237- // output to WTS file
1238- if (params.output_wts ) {
1239- const auto fname_wts = fname_out + " .wts" ;
1240- output_wts (ctx, fname_wts.c_str (), fname_inp.c_str (), params, float (pcmf32.size () + 1000 )/WHISPER_SAMPLE_RATE, pcmf32s);
1241- }
1242-
1243- // output to CSV file
1244- if (params.output_csv ) {
1245- const auto fname_csv = fname_out + " .csv" ;
1246- output_csv (ctx, fname_csv.c_str (), params, pcmf32s);
1247- }
1248-
1249- // output to JSON file
1250- if (params.output_jsn ) {
1251- const auto fname_jsn = fname_out + " .json" ;
1252- output_json (ctx, fname_jsn.c_str (), params, pcmf32s, params.output_jsn_full );
1253- }
1254-
1255- // output to LRC file
1256- if (params.output_lrc ) {
1257- const auto fname_lrc = fname_out + " .lrc" ;
1258- output_lrc (ctx, fname_lrc.c_str (), params, pcmf32s);
1259- }
1260-
1261- // output to score file
1262- if (params.log_score ) {
1263- const auto fname_score = fname_out + " .score.txt" ;
1264- output_score (ctx, fname_score.c_str (), params, pcmf32s);
1196+ // macros to stringify function name
1197+ #define output_func (func, ext, param, ...) if (param && fout_factory.open(ext, #func)) {\
1198+ func (ctx, fout_factory.fout , params, __VA_ARGS__); \
1199+ }
1200+ #define output_ext (ext, ...) output_func(output_##ext, " ." #ext, params.output_##ext, __VA_ARGS__)
1201+
1202+ output_ext (txt, pcmf32s);
1203+ output_ext (vtt, pcmf32s);
1204+ output_ext (srt, pcmf32s);
1205+ output_ext (wts, pcmf32s, fname_inp.c_str (), float (pcmf32.size () + 1000 )/WHISPER_SAMPLE_RATE, fout_factory.fname_out .c_str ());
1206+ output_ext (csv, pcmf32s);
1207+ output_func (output_json, " .json" , params.output_jsn , pcmf32s);
1208+ output_ext (lrc, pcmf32s);
1209+ output_func (output_score, " .score.txt" , params.log_score , pcmf32s);
1210+
1211+ #undef output_ext
1212+ #undef output_func
1213+
1214+ if (fout_factory.is_stdout && !fout_factory.used_stdout ) {
1215+ fprintf (stderr, " warning: '--output-file -' used without any other '--output-*'" );
12651216 }
12661217 }
12671218 }
0 commit comments