@@ -251,7 +251,7 @@ static std::vector<std::string> get_words(const std::string &txt) {
251
251
252
252
// command-list mode
253
253
// guide the transcription to match the most likely command from a provided list
254
- static int process_command_list (struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms) {
254
+ static int process_command_list (struct whisper_context * ctx, audio_async &audio, const whisper_params ¶ms, std::ofstream &fout ) {
255
255
fprintf (stderr, " \n " );
256
256
fprintf (stderr, " %s: guided mode\n " , __func__);
257
257
@@ -444,12 +444,16 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
444
444
445
445
const float prob = probs_id[0 ].first ;
446
446
const int index = probs_id[0 ].second ;
447
+ const char * best_command = allowed_commands[index].c_str ();
447
448
448
449
fprintf (stdout, " \n " );
449
450
fprintf (stdout, " %s: detected command: %s%s%s | p = %f | t = %d ms\n " , __func__,
450
- " \033 [1m" , allowed_commands[index]. c_str () , " \033 [0m" , prob,
451
+ " \033 [1m" , best_command , " \033 [0m" , prob,
451
452
(int ) std::chrono::duration_cast<std::chrono::milliseconds>(t_end - t_start).count ());
452
453
fprintf (stdout, " \n " );
454
+ if (fout.is_open ()) {
455
+ fout << best_command << std::endl;
456
+ }
453
457
}
454
458
}
455
459
@@ -462,7 +466,7 @@ static int process_command_list(struct whisper_context * ctx, audio_async &audio
462
466
463
467
// always-prompt mode
464
468
// transcribe the voice into text after valid prompt
465
- static int always_prompt_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
469
+ static int always_prompt_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout ) {
466
470
bool is_running = true ;
467
471
bool ask_prompt = true ;
468
472
@@ -528,6 +532,9 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
528
532
529
533
if ((sim > 0 .7f ) && (command.size () > 0 )) {
530
534
fprintf (stdout, " %s: Command '%s%s%s', (t = %d ms)\n " , __func__, " \033 [1m" , command.c_str (), " \033 [0m" , (int ) t_ms);
535
+ if (fout.is_open ()) {
536
+ fout << command << std::endl;
537
+ }
531
538
}
532
539
533
540
fprintf (stdout, " \n " );
@@ -542,7 +549,7 @@ static int always_prompt_transcription(struct whisper_context * ctx, audio_async
542
549
543
550
// general-purpose mode
544
551
// freely transcribe the voice into text
545
- static int process_general_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params) {
552
+ static int process_general_transcription (struct whisper_context * ctx, audio_async & audio, const whisper_params & params, std::ofstream & fout ) {
546
553
bool is_running = true ;
547
554
bool have_prompt = false ;
548
555
bool ask_prompt = true ;
@@ -662,8 +669,10 @@ static int process_general_transcription(struct whisper_context * ctx, audio_asy
662
669
} else {
663
670
// cut the prompt from the decoded text
664
671
const std::string command = ::trim (txt.substr (best_len));
665
-
666
672
fprintf (stdout, " %s: Command '%s%s%s', (t = %d ms)\n " , __func__, " \033 [1m" , command.c_str (), " \033 [0m" , (int ) t_ms);
673
+ if (fout.is_open ()) {
674
+ fout << command << std::endl;
675
+ }
667
676
}
668
677
669
678
fprintf (stdout, " \n " );
@@ -759,13 +768,22 @@ int main(int argc, char ** argv) {
759
768
}
760
769
}
761
770
771
+ std::ofstream fout;
772
+ if (params.fname_out .length () > 0 ) {
773
+ fout.open (params.fname_out );
774
+ if (!fout.is_open ()) {
775
+ fprintf (stderr, " %s: failed to open output file '%s'!\n " , __func__, params.fname_out .c_str ());
776
+ return 1 ;
777
+ }
778
+ }
779
+
762
780
if (ret_val == 0 ) {
763
781
if (!params.commands .empty ()) {
764
- ret_val = process_command_list (ctx, audio, params);
782
+ ret_val = process_command_list (ctx, audio, params, fout );
765
783
} else if (!params.prompt .empty () && params.grammar_parsed .rules .empty ()) {
766
- ret_val = always_prompt_transcription (ctx, audio, params);
784
+ ret_val = always_prompt_transcription (ctx, audio, params, fout );
767
785
} else {
768
- ret_val = process_general_transcription (ctx, audio, params);
786
+ ret_val = process_general_transcription (ctx, audio, params, fout );
769
787
}
770
788
}
771
789
0 commit comments