1616#include < vector>
1717#include < fstream>
1818
19+ #ifdef _WIN32
20+ #include < windows.h>
21+ #include < io.h>
22+ #else
23+ #include < fcntl.h>
24+ #include < unistd.h>
25+ #endif
26+
27+ void setStdinNonBlocking () {
28+ #ifdef _WIN32
29+ DWORD mode;
30+ HANDLE stdinHandle = GetStdHandle (STD_INPUT_HANDLE);
31+ GetConsoleMode (stdinHandle, &mode);
32+ mode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
33+ SetConsoleMode (stdinHandle, mode);
34+ #else
35+ fcntl (fileno (stdin), F_SETFL, fcntl (fileno (stdin), F_GETFL, 0 ) | O_NONBLOCK);
36+ #endif
37+ }
38+
39+ void setStdinBlocking () {
40+ #if defined(_WIN32)
41+ DWORD mode;
42+ HANDLE stdinHandle = GetStdHandle (STD_INPUT_HANDLE);
43+ GetConsoleMode (stdinHandle, &mode);
44+ mode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
45+ SetConsoleMode (stdinHandle, mode);
46+ #else
47+ fcntl (fileno (stdin), F_SETFL, fcntl (fileno (stdin), F_GETFL, 0 ) & ~O_NONBLOCK);
48+ #endif
49+ }
50+
1951
2052// command-line parameters
2153struct whisper_params {
@@ -143,12 +175,22 @@ int main(int argc, char ** argv) {
143175 // init audio
144176
145177 audio_async audio (params.length_ms );
146- if (!audio.init (params.capture_id , WHISPER_SAMPLE_RATE)) {
147- fprintf (stderr, " %s: audio.init() failed!\n " , __func__);
148- return 1 ;
149- }
178+ bool piped = !isatty (fileno (stdin));
179+
180+ if (piped) {
181+ #ifdef _WIN32
182+ _setmode (_fileno (stdin), _O_BINARY);
183+ #else
184+ freopen (NULL , " rb" , stdin);
185+ #endif
186+ } else {
187+ if (!audio.init (params.capture_id , WHISPER_SAMPLE_RATE)) {
188+ fprintf (stderr, " %s: audio.init() failed!\n " , __func__);
189+ return 1 ;
190+ }
150191
151- audio.resume ();
192+ audio.resume ();
193+ }
152194
153195 // whisper init
154196 if (params.language != " auto" && whisper_lang_id (params.language .c_str ()) == -1 ){
@@ -225,9 +267,43 @@ int main(int argc, char ** argv) {
225267
226268 wavWriter.open (filename, WHISPER_SAMPLE_RATE, 16 , 1 );
227269 }
270+
271+ // ignore premature stdin
272+ int n_mod = 0 ;
273+ if (piped) {
274+ const auto n_bytes_len = sizeof (float ) * n_samples_len;
275+ setStdinNonBlocking ();
276+ while (true ) {
277+ const auto n_bytes_read = read (fileno (stdin), pcmf32.data (), n_bytes_len);
278+ if (n_bytes_read == -1 && errno == EAGAIN) {
279+ break ;
280+ } else if (n_bytes_read < 1 ) {
281+ fprintf (stderr, " stdin ended too early\n " );
282+ is_running = false ;
283+ break ;
284+ }
285+ n_mod = n_bytes_read % sizeof (float );
286+ if (n_bytes_read < n_bytes_len) {
287+ break ;
288+ }
289+ }
290+ }
291+
228292 fprintf (stderr, " [Start speaking]\n " );
229293 fflush (stderr);
230294
295+ if (piped) {
296+ // ignore the partial sample
297+ if (n_mod > 0 ) {
298+ const auto n_remain = sizeof (float ) - n_mod;
299+ setStdinBlocking ();
300+ if (n_remain != fread (pcmf32.data (), 1 , n_remain, stdin)) {
301+ is_running = false ;
302+ }
303+ }
304+ setStdinNonBlocking ();
305+ }
306+
231307 auto t_last = std::chrono::high_resolution_clock::now ();
232308 auto t_interim = t_last;
233309 bool is_interim = false ;
@@ -250,6 +326,33 @@ int main(int argc, char ** argv) {
250326 // get new audio
251327 if (n_samples_new > n_samples_step) {
252328 pcmf32.clear ();
329+ } else if (piped) {
330+ pcmf32.resize (n_samples_len);
331+ char *p_buf = (char *)pcmf32.data ();
332+ const auto n_bytes_min = (n_samples_step - n_samples_new) * sizeof (float );
333+ auto n_bytes_wanted = n_samples_len * sizeof (float );
334+ auto n_bytes_read = 0 ;
335+ while (n_bytes_wanted > 0 ) {
336+ const auto n_read = read (fileno (stdin), p_buf + n_bytes_read, n_bytes_wanted);
337+ if (n_read == 0 || n_read == -1 && errno != EAGAIN) {
338+ fprintf (stderr, " read(stdin) returned %zd, errno = %d\n " , n_read, errno);
339+ is_running = false ;
340+ break ;
341+ }
342+ n_bytes_read += std::max (0L , n_read);
343+ if (n_bytes_read < n_bytes_min) {
344+ n_bytes_wanted = n_bytes_min - n_bytes_read;
345+ } else {
346+ n_bytes_wanted = n_bytes_read % sizeof (float );
347+ }
348+ if (n_bytes_wanted > 0 ) {
349+ std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
350+ }
351+ }
352+ pcmf32.resize (n_bytes_read / sizeof (float ));
353+ if (!is_running) {
354+ break ;
355+ }
253356 } else if (t_diff < abs (params.step_ms )) {
254357 std::this_thread::sleep_for (std::chrono::milliseconds (abs (params.step_ms ) - t_diff));
255358 continue ;
@@ -308,7 +411,9 @@ int main(int argc, char ** argv) {
308411 } else {
309412 n_samples_new -= n_samples_100ms;
310413 n_samples_old = std::min (n_samples_len, n_samples_old + n_samples_100ms);
311- std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
414+ if (!piped) {
415+ std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
416+ }
312417 continue ;
313418 }
314419 }
0 commit comments