Skip to content

Commit b114ec3

Browse files
committed
Accept pipe to stream
Now it is easy to test with raw PCM data. Try `cat pcmf32.raw | stream` (or `pv -qL 64000 pcmf32.raw | stream` in realtime) Note: I haven't tested WIN32 ifdefs. You can make such data by `ffmpeg -i jfk.wav -f f32le -acodec pcm_f32le jfk.raw` because wav header length (44) is a multiple of `sizeof float` (4) I decided to ignore the data before `[Start speaking]` because such premature data are not good for remote-transcription systems like: ``` mic2pcm | ssh -C remote "stream | lines2googledocs" ``` or ``` mic2some | ssh -C remote "ffmpeg -loglevel fatal -i pipe:0 -tune zerolatency -af atempo=1.1 -f f32le -ar 16000 -acodec pcm_f32le pipe:1 | stream" ``` So if you want to do a strict test, remove the "ignore" part. Otherwise quite a number of bytes will be ignored.
1 parent b27fc1f commit b114ec3

File tree

1 file changed

+111
-6
lines changed

1 file changed

+111
-6
lines changed

examples/stream/stream.cpp

Lines changed: 111 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,38 @@
1616
#include <vector>
1717
#include <fstream>
1818

19+
#ifdef _WIN32
20+
#include <windows.h>
21+
#include <io.h>
22+
#else
23+
#include <fcntl.h>
24+
#include <unistd.h>
25+
#endif
26+
27+
void setStdinNonBlocking() {
28+
#ifdef _WIN32
29+
DWORD mode;
30+
HANDLE stdinHandle = GetStdHandle(STD_INPUT_HANDLE);
31+
GetConsoleMode(stdinHandle, &mode);
32+
mode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
33+
SetConsoleMode(stdinHandle, mode);
34+
#else
35+
fcntl(fileno(stdin), F_SETFL, fcntl(fileno(stdin), F_GETFL, 0) | O_NONBLOCK);
36+
#endif
37+
}
38+
39+
void setStdinBlocking() {
40+
#if defined(_WIN32)
41+
DWORD mode;
42+
HANDLE stdinHandle = GetStdHandle(STD_INPUT_HANDLE);
43+
GetConsoleMode(stdinHandle, &mode);
44+
mode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
45+
SetConsoleMode(stdinHandle, mode);
46+
#else
47+
fcntl(fileno(stdin), F_SETFL, fcntl(fileno(stdin), F_GETFL, 0) & ~O_NONBLOCK);
48+
#endif
49+
}
50+
1951

2052
// command-line parameters
2153
struct whisper_params {
@@ -143,12 +175,22 @@ int main(int argc, char ** argv) {
143175
// init audio
144176

145177
audio_async audio(params.length_ms);
146-
if (!audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
147-
fprintf(stderr, "%s: audio.init() failed!\n", __func__);
148-
return 1;
149-
}
178+
bool piped = !isatty(fileno(stdin));
179+
180+
if (piped) {
181+
#ifdef _WIN32
182+
_setmode(_fileno(stdin), _O_BINARY);
183+
#else
184+
freopen(NULL, "rb", stdin);
185+
#endif
186+
} else {
187+
if (!audio.init(params.capture_id, WHISPER_SAMPLE_RATE)) {
188+
fprintf(stderr, "%s: audio.init() failed!\n", __func__);
189+
return 1;
190+
}
150191

151-
audio.resume();
192+
audio.resume();
193+
}
152194

153195
// whisper init
154196
if (params.language != "auto" && whisper_lang_id(params.language.c_str()) == -1){
@@ -225,9 +267,43 @@ int main(int argc, char ** argv) {
225267

226268
wavWriter.open(filename, WHISPER_SAMPLE_RATE, 16, 1);
227269
}
270+
271+
// ignore premature stdin
272+
int n_mod = 0;
273+
if (piped) {
274+
const auto n_bytes_len = sizeof(float) * n_samples_len;
275+
setStdinNonBlocking();
276+
while (true) {
277+
const auto n_bytes_read = read(fileno(stdin), pcmf32.data(), n_bytes_len);
278+
if (n_bytes_read == -1 && errno == EAGAIN) {
279+
break;
280+
} else if (n_bytes_read < 1) {
281+
fprintf(stderr, "stdin ended too early\n");
282+
is_running = false;
283+
break;
284+
}
285+
n_mod = n_bytes_read % sizeof(float);
286+
if (n_bytes_read < n_bytes_len) {
287+
break;
288+
}
289+
}
290+
}
291+
228292
fprintf(stderr, "[Start speaking]\n");
229293
fflush(stderr);
230294

295+
if (piped) {
296+
// ignore the partial sample
297+
if (n_mod > 0) {
298+
const auto n_remain = sizeof(float) - n_mod;
299+
setStdinBlocking();
300+
if (n_remain != fread(pcmf32.data(), 1, n_remain, stdin)) {
301+
is_running = false;
302+
}
303+
}
304+
setStdinNonBlocking();
305+
}
306+
231307
auto t_last = std::chrono::high_resolution_clock::now();
232308
auto t_interim = t_last;
233309
bool is_interim = false;
@@ -250,6 +326,33 @@ int main(int argc, char ** argv) {
250326
// get new audio
251327
if (n_samples_new > n_samples_step) {
252328
pcmf32.clear();
329+
} else if (piped) {
330+
pcmf32.resize(n_samples_len);
331+
char *p_buf = (char *)pcmf32.data();
332+
const auto n_bytes_min = (n_samples_step - n_samples_new) * sizeof(float);
333+
auto n_bytes_wanted = n_samples_len * sizeof(float);
334+
auto n_bytes_read = 0;
335+
while (n_bytes_wanted > 0) {
336+
const auto n_read = read(fileno(stdin), p_buf + n_bytes_read, n_bytes_wanted);
337+
if (n_read == 0 || n_read == -1 && errno != EAGAIN) {
338+
fprintf(stderr, "read(stdin) returned %zd, errno = %d\n", n_read, errno);
339+
is_running = false;
340+
break;
341+
}
342+
n_bytes_read += std::max(0L, n_read);
343+
if (n_bytes_read < n_bytes_min) {
344+
n_bytes_wanted = n_bytes_min - n_bytes_read;
345+
} else {
346+
n_bytes_wanted = n_bytes_read % sizeof(float);
347+
}
348+
if (n_bytes_wanted > 0) {
349+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
350+
}
351+
}
352+
pcmf32.resize(n_bytes_read / sizeof(float));
353+
if (!is_running) {
354+
break;
355+
}
253356
} else if (t_diff < abs(params.step_ms)) {
254357
std::this_thread::sleep_for(std::chrono::milliseconds(abs(params.step_ms) - t_diff));
255358
continue;
@@ -308,7 +411,9 @@ int main(int argc, char ** argv) {
308411
} else {
309412
n_samples_new -= n_samples_100ms;
310413
n_samples_old = std::min(n_samples_len, n_samples_old + n_samples_100ms);
311-
std::this_thread::sleep_for(std::chrono::milliseconds(100));
414+
if (!piped) {
415+
std::this_thread::sleep_for(std::chrono::milliseconds(100));
416+
}
312417
continue;
313418
}
314419
}

0 commit comments

Comments
 (0)