From 4be3da541ff5fe508f02c403f3c78b6b550761ac Mon Sep 17 00:00:00 2001 From: Sacha Arbonel Date: Wed, 23 Jul 2025 14:53:27 +0200 Subject: [PATCH 1/3] examples/server: add warmup file parameter for model initialization --- examples/server/server.cpp | 40 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 901f65f6c35..b243d019c39 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -129,6 +129,9 @@ struct whisper_params { float vad_max_speech_duration_s = FLT_MAX; int vad_speech_pad_ms = 30; float vad_samples_overlap = 0.1f; + + // Warmup parameters + std::string warmup_file = ""; }; void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params, const server_params& sparams) { @@ -192,6 +195,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para std::to_string(params.vad_max_speech_duration_s).c_str()); fprintf(stderr, " -vp N, --vad-speech-pad-ms N [%-7d] VAD speech padding (extend segments)\n", params.vad_speech_pad_ms); fprintf(stderr, " -vo N, --vad-samples-overlap N [%-7.2f] VAD samples overlap (seconds between segments)\n", params.vad_samples_overlap); + fprintf(stderr, " -wf PATH, --warmup-file PATH [%-7s] path to audio file for model warmup\n", params.warmup_file.c_str()); fprintf(stderr, "\n"); } @@ -258,6 +262,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve else if (arg == "-vmsd" || arg == "--vad-max-speech-duration-s") { params.vad_max_speech_duration_s = std::stof(argv[++i]); } else if (arg == "-vp" || arg == "--vad-speech-pad-ms") { params.vad_speech_pad_ms = std::stoi(argv[++i]); } else if (arg == "-vo" || arg == "--vad-samples-overlap") { params.vad_samples_overlap = std::stof(argv[++i]); } + else if (arg == "-wf" || arg == "--warmup-file") { params.warmup_file = argv[++i]; } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); whisper_print_usage(argc, argv, params, sparams); @@ -703,6 +708,41 @@ int main(int argc, char ** argv) { // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr); + + // warmup model if warmup file is provided + if (!params.warmup_file.empty()) { + printf("Warming up model with audio file: %s\n", params.warmup_file.c_str()); + std::vector pcmf32_warmup; + std::vector> pcmf32s_warmup; + + if (read_audio_data(params.warmup_file, pcmf32_warmup, pcmf32s_warmup, false)) { + whisper_full_params wparams = whisper_full_default_params(WHISPER_SAMPLING_GREEDY); + wparams.print_realtime = false; + wparams.print_progress = false; + wparams.print_timestamps = false; + wparams.print_special = false; + wparams.translate = false; + wparams.language = "en"; + wparams.n_threads = params.n_threads; + wparams.n_max_text_ctx = 128; + wparams.no_context = true; + wparams.single_segment = true; + wparams.audio_ctx = 768; + + const auto t_start = std::chrono::high_resolution_clock::now(); + + if (whisper_full_parallel(ctx, wparams, pcmf32_warmup.data(), pcmf32_warmup.size(), 1) == 0) { + const auto t_end = std::chrono::high_resolution_clock::now(); + const auto t_ms = std::chrono::duration_cast(t_end - t_start).count(); + printf("Model warmup completed in %d ms\n", (int)t_ms); + } else { + fprintf(stderr, "warning: model warmup failed\n"); + } + } else { + fprintf(stderr, "warning: failed to read warmup audio file '%s'\n", params.warmup_file.c_str()); + } + } + state.store(SERVER_STATE_READY); From fa04e39716b9c2ba9f564fd1f3bbc14bccab6408 Mon Sep 17 00:00:00 2001 From: Sacha Arbonel Date: Wed, 23 Jul 2025 15:07:24 +0200 Subject: [PATCH 2/3] remove extra lines --- examples/server/server.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b243d019c39..9b206883e57 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -708,7 +708,6 @@ int main(int argc, char ** argv) { // initialize openvino encoder. this has no effect on whisper.cpp builds that don't have OpenVINO configured whisper_ctx_init_openvino_encoder(ctx, nullptr, params.openvino_encode_device.c_str(), nullptr); - // warmup model if warmup file is provided if (!params.warmup_file.empty()) { printf("Warming up model with audio file: %s\n", params.warmup_file.c_str()); @@ -742,7 +741,6 @@ int main(int argc, char ** argv) { fprintf(stderr, "warning: failed to read warmup audio file '%s'\n", params.warmup_file.c_str()); } } - state.store(SERVER_STATE_READY); From 3d170cbf979ced5ad00b6338523a02ef5afc615f Mon Sep 17 00:00:00 2001 From: Sacha Arbonel Date: Wed, 23 Jul 2025 15:12:00 +0200 Subject: [PATCH 3/3] Remove unnecessary blank line --- examples/server/server.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9b206883e57..ea1a53c0bda 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -129,7 +129,6 @@ struct whisper_params { float vad_max_speech_duration_s = FLT_MAX; int vad_speech_pad_ms = 30; float vad_samples_overlap = 0.1f; - // Warmup parameters std::string warmup_file = ""; };