@@ -76,6 +76,7 @@ struct whisper_params {
7676 bool no_timestamps = false ;
7777 bool use_gpu = true ;
7878 bool flash_attn = false ;
79+ bool suppress_non_speech_tokens = false ;
7980
8081 std::string language = " en" ;
8182 std::string prompt = " " ;
@@ -135,6 +136,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
135136 fprintf (stderr, " --request-path PATH, [%-7s] Request path for all requests\n " , sparams.request_path .c_str ());
136137 fprintf (stderr, " --inference-path PATH, [%-7s] Inference path for all requests\n " , sparams.inference_path .c_str ());
137138 fprintf (stderr, " --convert, [%-7s] Convert audio to WAV, requires ffmpeg on the server" , sparams.ffmpeg_converter ? " true" : " false" );
139+ fprintf (stderr, " -sns, --suppress-non-speech [%-7s] suppress non-speech tokens\n " , params.suppress_non_speech_tokens ? " true" : " false" );
138140 fprintf (stderr, " \n " );
139141}
140142
@@ -179,6 +181,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
179181 else if (arg == " -dtw" || arg == " --dtw" ) { params.dtw = argv[++i]; }
180182 else if (arg == " -ng" || arg == " --no-gpu" ) { params.use_gpu = false ; }
181183 else if (arg == " -fa" || arg == " --flash-attn" ) { params.flash_attn = true ; }
184+ else if (arg == " -sns" || arg == " --suppress-non-speech" ) { params.suppress_non_speech_tokens = true ; }
182185 // server params
183186 else if ( arg == " --port" ) { sparams.port = std::stoi (argv[++i]); }
184187 else if ( arg == " --host" ) { sparams.hostname = argv[++i]; }
@@ -472,6 +475,10 @@ void get_req_parameters(const Request & req, whisper_params & params)
472475 {
473476 params.temperature_inc = std::stof (req.get_file_value (" temperature_inc" ).content );
474477 }
478+ if (req.has_file (" suppress_non_speech" ))
479+ {
480+ params.suppress_non_speech_tokens = parse_str_to_bool (req.get_file_value (" suppress_non_speech" ).content );
481+ }
475482}
476483
477484} // namespace
@@ -786,6 +793,8 @@ int main(int argc, char ** argv) {
786793 wparams.no_timestamps = params.no_timestamps ;
787794 wparams.token_timestamps = !params.no_timestamps && params.response_format == vjson_format;
788795
796+ wparams.suppress_non_speech_tokens = params.suppress_non_speech_tokens ;
797+
789798 whisper_print_user_data user_data = { ¶ms, &pcmf32s, 0 };
790799
791800 // this callback is called on each new segment
0 commit comments