@@ -61,6 +61,7 @@ struct whisper_params {
6161 float logprob_thold = -1 .00f ;
6262 float temperature = 0 .00f ;
6363 float temperature_inc = 0 .20f ;
64+ float no_speech_thold = 0 .6f ;
6465
6566 bool debug_mode = false ;
6667 bool translate = false ;
@@ -137,6 +138,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
137138 fprintf (stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n " , sparams.inference_path .c_str ());
138139 fprintf (stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server" , sparams.ffmpeg_converter ? " true" : " false" );
139140 fprintf (stderr, " -sns, --suppress-nst [%-7s] suppress non-speech tokens\n " , params.suppress_nst ? " true" : " false" );
141+ fprintf (stderr, " -nth N, --no-speech-thold N [%-7.2f] no speech threshold\n " , params.no_speech_thold );
140142 fprintf (stderr, " \n " );
141143}
142144
@@ -182,6 +184,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
182184 else if (arg == " -ng" || arg == " --no-gpu" ) { params.use_gpu = false ; }
183185 else if (arg == " -fa" || arg == " --flash-attn" ) { params.flash_attn = true ; }
184186 else if (arg == " -sns" || arg == " --suppress-nst" ) { params.suppress_nst = true ; }
187+ else if (arg == " -nth" || arg == " --no-speech-thold" ) { params.no_speech_thold = std::stof (argv[++i]); }
188+
185189 // server params
186190 else if ( arg == " --port" ) { sparams.port = std::stoi (argv[++i]); }
187191 else if ( arg == " --host" ) { sparams.hostname = argv[++i]; }
@@ -790,6 +794,7 @@ int main(int argc, char ** argv) {
790794 wparams.beam_search .beam_size = params.beam_size ;
791795
792796 wparams.temperature = params.temperature ;
797+ wparams.no_speech_thold = params.no_speech_thold ;
793798 wparams.temperature_inc = params.temperature_inc ;
794799 wparams.entropy_thold = params.entropy_thold ;
795800 wparams.logprob_thold = params.logprob_thold ;
@@ -942,7 +947,7 @@ int main(int argc, char ** argv) {
942947
943948 // TODO compression_ratio and no_speech_prob are not implemented yet
944949 // segment["compression_ratio"] = 0;
945- // segment["no_speech_prob"] = 0 ;
950+ segment[" no_speech_prob" ] = whisper_full_get_segment_no_speech_prob (ctx, i) ;
946951
947952 jres[" segments" ].push_back (segment);
948953 }
0 commit comments