35
35
except :
36
36
logging .info ("pocketsphinx not available" )
37
37
38
- CHUNK_SIZE = 4096
39
- FORMAT = pyaudio .paInt16
40
- RATE = 44100
38
+ ## GOOGLE Speech API ##
39
+ # [START import_libraries]
40
+ #from __future__ import division
41
+
42
+ import re
43
+ import sys
44
+
45
+ from google .cloud import speech
46
+ from google .cloud .speech import enums
47
+ from google .cloud .speech import types
48
+ import pyaudio
49
+ from six .moves import queue
50
+ # [END import_libraries]
51
+
52
+ # Audio recording parameters
53
+ RATE = 16000
54
+ CHUNK = int (RATE / 10 ) # 100ms
41
55
42
56
MODELDIR = "/home/pi/coderbot/psmodels/"
43
57
SOUNDDIR = "./sounds/"
@@ -62,6 +76,8 @@ def __init__(self):
62
76
except :
63
77
logging .info ("Audio: input stream not available" )
64
78
79
+ self ._google_speech_client = speech .SpeechClient ()
80
+
65
81
def exit (self ):
66
82
# cleanup stuff.
67
83
self .stream_in .close ()
@@ -217,3 +233,142 @@ def speech_recog(self, model):
217
233
logging .info ("recog text: " + recog_text )
218
234
return recog_text
219
235
236
+ def speech_recog_google (self , locale ):
237
+ config = types .RecognitionConfig (
238
+ encoding = enums .RecognitionConfig .AudioEncoding .LINEAR16 ,
239
+ sample_rate_hertz = RATE ,
240
+ language_code = locale )
241
+ streaming_config = types .StreamingRecognitionConfig (
242
+ config = config ,
243
+ interim_results = False ,
244
+ single_utterance = True )
245
+
246
+ with self .MicrophoneStream (RATE , CHUNK ) as stream :
247
+ audio_generator = stream .generator ()
248
+ requests = (types .StreamingRecognizeRequest (audio_content = content )
249
+ for content in audio_generator )
250
+
251
+ responses = self ._google_speech_client .streaming_recognize (streaming_config , requests )
252
+
253
+ # Now, put the transcription responses to use.
254
+ #self.listen_print_loop(responses)
255
+ for response in responses :
256
+ if response .results :
257
+ result = response .results [0 ]
258
+ if result .is_final :
259
+ return result .alternatives [0 ].transcript
260
+
261
+ def listen_print_loop (self , responses ):
262
+ for response in responses :
263
+ if not response .results :
264
+ continue
265
+
266
+ # The `results` list is consecutive. For streaming, we only care about
267
+ # the first result being considered, since once it's `is_final`, it
268
+ # moves on to considering the next utterance.
269
+ result = response .results [0 ]
270
+ if not result .alternatives :
271
+ continue
272
+
273
+ # Display the transcription of the top alternative.
274
+ transcript = result .alternatives [0 ].transcript
275
+
276
+ # Display interim results, but with a carriage return at the end of the
277
+ # line, so subsequent lines will overwrite them.
278
+ #
279
+ # If the previous result was longer than this one, we need to print
280
+ # some extra spaces to overwrite the previous result
281
+ overwrite_chars = ' ' * (num_chars_printed - len (transcript ))
282
+
283
+ if not result .is_final :
284
+ sys .stdout .write (transcript + overwrite_chars + '\r ' )
285
+ sys .stdout .flush ()
286
+
287
+ num_chars_printed = len (transcript )
288
+
289
+ else :
290
+ print (transcript + overwrite_chars )
291
+
292
+ # Exit recognition if any of the transcribed phrases could be
293
+ # one of our keywords.
294
+ if re .search (r'\b(exit|quit)\b' , transcript , re .I ):
295
+ print ('Exiting..' )
296
+ break
297
+
298
+ num_chars_printed = 0
299
+
300
+
301
+ class MicrophoneStream (object ):
302
+ """Opens a recording stream as a generator yielding the audio chunks."""
303
+ def __init__ (self , rate , chunk ):
304
+ self ._rate = rate
305
+ self ._chunk = chunk
306
+
307
+ # Create a thread-safe buffer of audio data
308
+ self ._buff = queue .Queue ()
309
+ self .closed = True
310
+
311
+ def __enter__ (self ):
312
+ self ._audio_interface = pyaudio .PyAudio ()
313
+ self ._audio_stream = self ._audio_interface .open (
314
+ format = pyaudio .paInt16 ,
315
+ # The API currently only supports 1-channel (mono) audio
316
+ # https://goo.gl/z757pE
317
+ channels = 1 , rate = self ._rate ,
318
+ input = True , frames_per_buffer = self ._chunk ,
319
+ # Run the audio stream asynchronously to fill the buffer object.
320
+ # This is necessary so that the input device's buffer doesn't
321
+ # overflow while the calling thread makes network requests, etc.
322
+ stream_callback = self ._fill_buffer ,
323
+ )
324
+
325
+ self .closed = False
326
+
327
+ return self
328
+
329
+ def __exit__ (self , type , value , traceback ):
330
+ self ._audio_stream .stop_stream ()
331
+ self ._audio_stream .close ()
332
+ self .closed = True
333
+ # Signal the generator to terminate so that the client's
334
+ # streaming_recognize method will not block the process termination.
335
+ self ._buff .put (None )
336
+ self ._audio_interface .terminate ()
337
+
338
+ def __exit__ (self , type , value , traceback ):
339
+ self ._audio_stream .stop_stream ()
340
+ self ._audio_stream .close ()
341
+ self .closed = True
342
+ # Signal the generator to terminate so that the client's
343
+ # streaming_recognize method will not block the process termination.
344
+ self ._buff .put (None )
345
+ self ._audio_interface .terminate ()
346
+
347
+ def _fill_buffer (self , in_data , frame_count , time_info , status_flags ):
348
+ """Continuously collect data from the audio stream, into the buffer."""
349
+ self ._buff .put (in_data )
350
+ return None , pyaudio .paContinue
351
+
352
+ def generator (self ):
353
+ while not self .closed :
354
+ # Use a blocking get() to ensure there's at least one chunk of
355
+ # data, and stop iteration if the chunk is None, indicating the
356
+ # end of the audio stream.
357
+ chunk = self ._buff .get ()
358
+ if chunk is None :
359
+ return
360
+ data = [chunk ]
361
+
362
+ # Now consume whatever other data's still buffered.
363
+ while True :
364
+ try :
365
+ chunk = self ._buff .get (block = False )
366
+ if chunk is None :
367
+ return
368
+ data .append (chunk )
369
+ except queue .Empty :
370
+ break
371
+
372
+ yield b'' .join (data )
373
+ # [END audio_stream]
374
+
0 commit comments