Skip to content

Commit 6302794

Browse files
committed
Simplify pipe handling
it was too complicated for reviewers to accept and had a bug in aligning to sizeof(float) this commit reduces the number of lines this time `stream` doesn't skip the input before `[Start speaking]` but it is usually not so problematic because `read()` reads more than `step_ms` when possible
1 parent 1027e4f commit 6302794

File tree

1 file changed

+13
-57
lines changed

1 file changed

+13
-57
lines changed

examples/stream/stream.cpp

Lines changed: 13 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -37,18 +37,6 @@ void setStdinNonBlocking() {
3737
#endif
3838
}
3939

40-
void setStdinBlocking() {
41-
#ifdef _WIN32
42-
DWORD mode;
43-
HANDLE stdinHandle = GetStdHandle(STD_INPUT_HANDLE);
44-
GetConsoleMode(stdinHandle, &mode);
45-
mode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
46-
SetConsoleMode(stdinHandle, mode);
47-
#else
48-
fcntl(fileno(stdin), F_SETFL, fcntl(fileno(stdin), F_GETFL, 0) & ~O_NONBLOCK);
49-
#endif
50-
}
51-
5240

5341
// command-line parameters
5442
struct whisper_params {
@@ -74,7 +62,6 @@ struct whisper_params {
7462
bool flash_attn = false;
7563
bool interim = false;
7664
bool delete_vt100 = true;
77-
bool test_pipe = false;
7865

7966
std::string language = "en";
8067
std::string model = "models/ggml-base.en.bin";
@@ -114,7 +101,6 @@ static bool whisper_params_parse(int argc, char ** argv, whisper_params & params
114101
else if (arg == "-fa" || arg == "--flash-attn") { params.flash_attn = true; }
115102
else if (arg == "-int" || arg == "--interim") { params.interim = true; }
116103
else if (arg == "-nvt" || arg == "--no-vt100") { params.delete_vt100 = false; }
117-
else if ( arg == "--test-pipe") { params.test_pipe = true; }
118104

119105
else {
120106
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
@@ -155,7 +141,6 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
155141
fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention during inference\n", params.flash_attn ? "true" : "false");
156142
fprintf(stderr, " -int, --interim [%-7s] show interim report in vad every step\n", params.interim ? "true" : "false");
157143
fprintf(stderr, " -nvt, --no-vt100 [%-7s] do not delete unconfirmed result\n", params.delete_vt100 ? "false" : "true");
158-
fprintf(stderr, " --test-pipe [%-7s] use all data from pipe\n", params.test_pipe ? "true" : "false");
159144
fprintf(stderr, "\n");
160145
}
161146

@@ -188,6 +173,7 @@ int main(int argc, char ** argv) {
188173
#else
189174
freopen(NULL, "rb", stdin);
190175
#endif
176+
setStdinNonBlocking();
191177
} else {
192178
if (!audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
193179
fprintf(stderr, "%s: audio.init() failed!\n", __func__);
@@ -273,42 +259,9 @@ int main(int argc, char ** argv) {
273259
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
274260
}
275261

276-
// ignore premature stdin
277-
int n_mod = 0;
278-
if (piped && !params.test_pipe) {
279-
const auto n_bytes_len = sizeof(float) * n_samples_len;
280-
setStdinNonBlocking();
281-
while (true) {
282-
const auto n_bytes_read = read(fileno(stdin), pcmf32.data(), n_bytes_len);
283-
if (n_bytes_read == -1 && errno == EAGAIN) {
284-
break;
285-
} else if (n_bytes_read < 1) {
286-
fprintf(stderr, "stdin ended too early\n");
287-
is_running = false;
288-
break;
289-
}
290-
n_mod = n_bytes_read % sizeof(float);
291-
if (n_bytes_read < n_bytes_len) {
292-
break;
293-
}
294-
}
295-
}
296-
297262
fprintf(stderr, "[Start speaking]\n");
298263
fflush(stderr);
299264

300-
if (piped) {
301-
// ignore the partial sample
302-
if (n_mod > 0) {
303-
const auto n_remain = sizeof(float) - n_mod;
304-
setStdinBlocking();
305-
if (n_remain != fread(pcmf32.data(), 1, n_remain, stdin)) {
306-
is_running = false;
307-
}
308-
}
309-
setStdinNonBlocking();
310-
}
311-
312265
auto t_last = std::chrono::high_resolution_clock::now();
313266
auto t_interim = t_last;
314267
bool is_interim = false;
@@ -332,12 +285,15 @@ int main(int argc, char ** argv) {
332285
if (n_samples_new > n_samples_step) {
333286
pcmf32.clear();
334287
} else if (piped) {
335-
pcmf32.resize(n_samples_len);
336-
char *p_buf = (char *)pcmf32.data();
288+
// need at least step_ms
337289
const auto n_bytes_min = (n_samples_step - n_samples_new) * sizeof(float);
290+
// but try to get length_ms at first
338291
auto n_bytes_wanted = n_samples_len * sizeof(float);
292+
pcmf32.resize(n_samples_len);
293+
339294
auto n_bytes_read = 0;
340295
while (n_bytes_wanted > 0) {
296+
char *p_buf = (char *)pcmf32.data();
341297
const auto n_read = read(fileno(stdin), p_buf + n_bytes_read, n_bytes_wanted);
342298
if (n_read == 0 || n_read == -1 && errno != EAGAIN) {
343299
fprintf(stderr, "read(stdin) returned %zd, errno = %d\n", n_read, errno);
@@ -348,11 +304,11 @@ int main(int argc, char ** argv) {
348304
if (n_bytes_read < n_bytes_min) {
349305
n_bytes_wanted = n_bytes_min - n_bytes_read;
350306
} else {
351-
n_bytes_wanted = n_bytes_read % sizeof(float);
352-
}
353-
if (n_bytes_wanted > 0) {
354-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
307+
const auto n_mod = n_bytes_read % sizeof(float);
308+
n_bytes_wanted = (n_mod != 0) ? sizeof(float) - n_mod : 0;
355309
}
310+
const auto est_ms = 1000 * n_bytes_wanted / sizeof(float) / WHISPER_SAMPLE_RATE;
311+
std::this_thread::sleep_for(std::chrono::milliseconds(est_ms));
356312
}
357313
pcmf32.resize(n_bytes_read / sizeof(float));
358314
} else if (t_diff < abs(params.step_ms)) {
@@ -374,7 +330,7 @@ int main(int argc, char ** argv) {
374330
}
375331

376332
n_samples_new += n_samples_buf;
377-
if (!use_vad && n_samples_new > 2*n_samples_step) {
333+
if (!use_vad && !piped && n_samples_new > 2*n_samples_step) {
378334
fprintf(stderr, "\n\n%s: WARNING: cannot process audio fast enough, dropping audio ...\n", __func__);
379335
fprintf(stderr, "t_diff = %.2fs, new = %.2fs, buf = %.2fs\n\n", 1e-3*t_diff, float(n_samples_new)/WHISPER_SAMPLE_RATE, float(n_samples_buf)/WHISPER_SAMPLE_RATE);
380336
n_samples_old = 0;
@@ -513,10 +469,10 @@ int main(int argc, char ** argv) {
513469
text << std::endl;
514470
}
515471
if (is_interim) {
516-
// utf-8 cannot be simply cut into two
472+
// utf-8 cannot be simply cut
517473
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> conv;
518474
const auto t_u32 = conv.from_bytes(i_text);
519-
const auto t_sub = conv.to_bytes(t_u32.substr(0, t_u32.size() * 0.7));
475+
const auto t_sub = conv.to_bytes(t_u32.substr(0, t_u32.size() * 0.9));
520476
i_text = t_sub + "";
521477
}
522478
if (s_to_delete.size() > 0) {

0 commit comments

Comments
 (0)