Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 61 additions & 3 deletions examples/stream/stream.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ struct whisper_params {
bool save_audio = false; // save audio to wav file
bool use_gpu = true;
bool flash_attn = true;
int file_mode = 0; // 0=raw, 1=newline

std::string language = "en";
std::string model = "models/ggml-base.en.bin";
Expand All @@ -45,6 +46,23 @@ struct whisper_params {

void whisper_print_usage(int argc, char ** argv, const whisper_params & params);

// no helpers needed for file write modes when using raw/newline only

static int file_mode_from_string(const std::string & mode) {
if (mode == "raw") return 0;
if (mode == "newline") return 1;
fprintf(stderr, "error: unknown --file-mode '%s' (expected: raw|newline)\n", mode.c_str());
exit(1);
}

static const char * file_mode_to_cstr(int mode) {
switch (mode) {
case 0: return "raw";
case 1: return "newline";
default: return "raw";
}
}

static bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
for (int i = 1; i < argc; i++) {
std::string arg = argv[i];
Expand All @@ -70,6 +88,7 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
else if (arg == "-l" || arg == "--language") { params.language = argv[++i]; }
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_out = argv[++i]; }
else if ( arg == "--file-mode") { params.file_mode = file_mode_from_string(argv[++i]); }
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
else if (arg == "-sa" || arg == "--save-audio") { params.save_audio = true; }
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
Expand Down Expand Up @@ -109,6 +128,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -l LANG, --language LANG [%-7s] spoken language\n", params.language.c_str());
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] text output file name\n", params.fname_out.c_str());
fprintf(stderr, " --file-mode MODE [%-7s] file write mode: raw|newline (default raw)\n", file_mode_to_cstr(params.file_mode));
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
fprintf(stderr, " -sa, --save-audio [%-7s] save the recorded audio to a file\n", params.save_audio ? "true" : "false");
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU inference\n", params.use_gpu ? "false" : "true");
Expand Down Expand Up @@ -211,6 +231,7 @@ int main(int argc, char ** argv) {
bool is_running = true;

std::ofstream fout;
std::string file_buffer;
if (params.fname_out.length() > 0) {
fout.open(params.fname_out);
if (!fout.is_open()) {
Expand All @@ -231,6 +252,11 @@ int main(int argc, char ** argv) {
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
}
printf("[Start speaking]\n");
if (params.fname_out.length() > 0) {
// Mirror the start signal into the file for parity with stdout
fout << "[Start speaking]\n";
fout.flush();
}
fflush(stdout);

auto t_last = std::chrono::high_resolution_clock::now();
Expand Down Expand Up @@ -345,6 +371,10 @@ int main(int argc, char ** argv) {

// print result;
{
if (params.fname_out.length() > 0 && (params.file_mode != 0)) {
file_buffer.clear();
}

if (!use_vad) {
printf("\33[2K\r");

Expand All @@ -370,7 +400,11 @@ int main(int argc, char ** argv) {
fflush(stdout);

if (params.fname_out.length() > 0) {
fout << text;
if (params.file_mode != 0) {
file_buffer += text;
} else {
fout << text;
}
}
} else {
const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
Expand All @@ -388,13 +422,21 @@ int main(int argc, char ** argv) {
fflush(stdout);

if (params.fname_out.length() > 0) {
fout << output;
if (params.file_mode != 0) {
file_buffer += output;
} else {
fout << output;
}
}
}
}

if (params.fname_out.length() > 0) {
fout << std::endl;
if (params.file_mode != 0) {
file_buffer.push_back('\n');
} else {
fout << std::endl;
}
}

if (use_vad) {
Expand All @@ -405,6 +447,16 @@ int main(int argc, char ** argv) {

++n_iter;

if ((params.file_mode != 0) && params.fname_out.length() > 0) {
if (use_vad || (!use_vad && (n_iter % n_new_line) == 0)) {
if (!file_buffer.empty()) {
fout << file_buffer;
fout.flush();
file_buffer.clear();
}
}
}

if (!use_vad && (n_iter % n_new_line) == 0) {
printf("\n");

Expand All @@ -430,6 +482,12 @@ int main(int argc, char ** argv) {

audio.pause();

if ((params.file_mode != 0) && params.fname_out.length() > 0 && !file_buffer.empty()) {
fout << file_buffer;
fout.flush();
file_buffer.clear();
}

whisper_print_timings(ctx);
whisper_free(ctx);

Expand Down