From d4505252c7f2fe7cb550519ea7078b177864a135 Mon Sep 17 00:00:00 2001 From: Slavik Bogdanov Date: Tue, 15 Jul 2025 17:50:35 +0700 Subject: [PATCH 1/5] feat(stream): add pausable stdin commands for pause/resume functionality --- examples/stream/stream.cpp | 58 +++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 37b23886821..b716a499bfd 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -13,6 +13,8 @@ #include #include #include +#include +#include // command-line parameters struct whisper_params { @@ -37,6 +39,7 @@ struct whisper_params { bool save_audio = false; // save audio to wav file bool use_gpu = true; bool flash_attn = false; + bool pausable = false; std::string language = "en"; std::string model = "models/ggml-base.en.bin"; @@ -74,6 +77,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; } else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; } else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; } + else if (arg == "-p" || arg == "--pausable") { params.pausable = true; } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); @@ -112,6 +116,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false"); fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true"); fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention during inference\n", params.flash_attn ? "true" : "false"); + fprintf(stderr, " --pausable [%-7s] allow stdin commands p,n (PAUSE)/(RESUME)\n", params.pausable ? "true" : "false"); fprintf(stderr, "\n"); } @@ -206,7 +211,9 @@ int main(int argc, char ** argv) { int n_iter = 0; - bool is_running = true; + std::atomic_bool is_running(true); + std::atomic_bool is_paused(false); + std::atomic_int control_state(0); // 1 - pause, 2 - resume std::ofstream fout; if (params.fname_out.length() > 0) { @@ -231,6 +238,26 @@ int main(int argc, char ** argv) { printf("[Start speaking]\n"); fflush(stdout); + std::thread control_thread; + if (params.pausable) { + control_thread = std::thread([&]() { + std::string line; + while (is_running) { + if (!std::getline(std::cin, line)) { + break; + } + + if (line == "p") { + control_state = 1; + } else if (line == "r") { + control_state = 2; + } else { + fprintf(stderr, "[ERROR] Only 'p' (pause), 'r' (resume) accepted]\n"); + } + } + }); + } + auto t_last = std::chrono::high_resolution_clock::now(); const auto t_start = t_last; @@ -246,6 +273,35 @@ int main(int argc, char ** argv) { break; } + if (params.pausable) { + int st = control_state.exchange(0); + if (st == 1 && !is_paused) { + audio.clear(); + audio.pause(); + + params.no_context = true; + + pcmf32.clear(); + pcmf32_new.clear(); + pcmf32_old.clear(); + prompt_tokens.clear(); + is_paused = true; + whisper_reset_timings(ctx); + } else if (st == 2 && is_paused) { + audio.resume(); + audio.clear(); + whisper_reset_timings(ctx); + is_paused = false; + t_last = std::chrono::high_resolution_clock::now(); + } + + if (is_paused) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + t_last = std::chrono::high_resolution_clock::now(); + continue; + } + } + // process new audio if (!use_vad) { From 0b372e24e825dd68ce12f4d3e4d5232aa6f4e0da Mon Sep 17 00:00:00 2001 From: Slavik Bogdanov Date: Mon, 21 Jul 2025 16:33:29 +0700 Subject: [PATCH 2/5] fix(stream): add missing short flag for pausable option --- examples/stream/stream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index b716a499bfd..78b3fca67ef 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -116,7 +116,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false"); fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true"); fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention during inference\n", params.flash_attn ? "true" : "false"); - fprintf(stderr, " --pausable [%-7s] allow stdin commands p,n (PAUSE)/(RESUME)\n", params.pausable ? "true" : "false"); + fprintf(stderr, " -p, --pausable [%-7s] allow stdin commands p,n (PAUSE)/(RESUME)\n", params.pausable ? "true" : "false"); fprintf(stderr, "\n"); } From 337bca11082da27cfe3a333b722bd54b5fd009a1 Mon Sep 17 00:00:00 2001 From: Slavik Bogdanov Date: Tue, 22 Jul 2025 14:10:44 +0700 Subject: [PATCH 3/5] fix(stream): correct control flow and add control_thread join --- examples/stream/stream.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 78b3fca67ef..145f3ea0c39 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -276,8 +276,8 @@ int main(int argc, char ** argv) { if (params.pausable) { int st = control_state.exchange(0); if (st == 1 && !is_paused) { - audio.clear(); audio.pause(); + audio.clear(); params.no_context = true; @@ -288,8 +288,8 @@ int main(int argc, char ** argv) { is_paused = true; whisper_reset_timings(ctx); } else if (st == 2 && is_paused) { - audio.resume(); audio.clear(); + audio.resume(); whisper_reset_timings(ctx); is_paused = false; t_last = std::chrono::high_resolution_clock::now(); @@ -487,5 +487,7 @@ int main(int argc, char ** argv) { whisper_print_timings(ctx); whisper_free(ctx); + if (control_thread.joinable()) + control_thread.join(); return 0; } From fd4606fb842932a512a69363641fcc9b4658f608 Mon Sep 17 00:00:00 2001 From: Slavik Bogdanov Date: Tue, 22 Jul 2025 14:45:52 +0700 Subject: [PATCH 4/5] fix(stream): update stdin command key for resume --- examples/stream/stream.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 145f3ea0c39..1f932953015 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -116,7 +116,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false"); fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true"); fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention during inference\n", params.flash_attn ? "true" : "false"); - fprintf(stderr, " -p, --pausable [%-7s] allow stdin commands p,n (PAUSE)/(RESUME)\n", params.pausable ? "true" : "false"); + fprintf(stderr, " -p, --pausable [%-7s] allow stdin commands p,r (PAUSE)/(RESUME)\n", params.pausable ? "true" : "false"); fprintf(stderr, "\n"); } @@ -489,5 +489,6 @@ int main(int argc, char ** argv) { if (control_thread.joinable()) control_thread.join(); + return 0; } From b0d429791319ff127b351a4d1fa7104ef3cf012a Mon Sep 17 00:00:00 2001 From: Slavik Bogdanov Date: Mon, 28 Jul 2025 22:23:55 +0700 Subject: [PATCH 5/5] fix(stream): correct audio pause/resume order --- examples/stream/stream.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/stream/stream.cpp b/examples/stream/stream.cpp index 1f932953015..6d496f39e91 100644 --- a/examples/stream/stream.cpp +++ b/examples/stream/stream.cpp @@ -276,8 +276,8 @@ int main(int argc, char ** argv) { if (params.pausable) { int st = control_state.exchange(0); if (st == 1 && !is_paused) { - audio.pause(); audio.clear(); + audio.pause(); params.no_context = true; @@ -288,8 +288,8 @@ int main(int argc, char ** argv) { is_paused = true; whisper_reset_timings(ctx); } else if (st == 2 && is_paused) { - audio.clear(); audio.resume(); + audio.clear(); whisper_reset_timings(ctx); is_paused = false; t_last = std::chrono::high_resolution_clock::now();