52
52
# Audio recording parameters
53
53
RATE = 16000
54
54
CHUNK = int (RATE / 10 ) # 100ms
55
+ FORMAT = pyaudio .paInt16
55
56
56
57
MODELDIR = "/home/pi/coderbot/psmodels/"
57
58
SOUNDDIR = "./sounds/"
@@ -67,21 +68,25 @@ def get_instance(cls):
67
68
return cls ._instance
68
69
69
70
def __init__ (self ):
70
- self .pyaudio = pyaudio .PyAudio ()
71
+ # self.pyaudio = pyaudio.PyAudio()
71
72
try :
72
- self .stream_in = self .pyaudio .open (format = FORMAT , channels = 1 , input_device_index = 2 , rate = RATE ,
73
- input = True ,
74
- frames_per_buffer = CHUNK_SIZE )
75
- self .stream_in .start_stream ()
76
- except :
73
+ #self.stream_in = self.pyaudio.open(format=FORMAT,
74
+ # channels=1, rate=RATE,
75
+ # input=True,
76
+ # frames_per_buffer=CHUNK)
77
+ #self.stream_in.start_stream()
78
+ self .stream_in = self .MicrophoneStream (FORMAT , RATE , CHUNK )
79
+ except Exception as e :
80
+ print e
77
81
logging .info ("Audio: input stream not available" )
78
82
79
83
self ._google_speech_client = speech .SpeechClient ()
80
84
81
85
def exit (self ):
86
+ pass
82
87
# cleanup stuff.
83
- self .stream_in .close ()
84
- self .pyaudio .terminate ()
88
+ # self.stream_in.close()
89
+ # self.pyaudio.terminate()
85
90
86
91
def say (self , what , locale = 'en' ):
87
92
if what and "$" in what :
@@ -92,50 +97,38 @@ def say(self, what, locale='en'):
92
97
def normalize (self , snd_data ):
93
98
"Average the volume out"
94
99
MAXIMUM = 16384
95
- #times = float(MAXIMUM) / audioop.rms(snd_data, 2)
96
100
times = float (MAXIMUM )/ max (abs (i ) for i in snd_data )
97
- logging . info ( "times: " + str (times ) )
101
+ print "times: " + str (times )
98
102
99
- r = array ('h' )
103
+ r = array ('h' , snd_data )
104
+ c = 0
100
105
for i in snd_data :
101
- r .append (int (i * times ))
106
+ r [c ] = int (i * times )
107
+ c += 1
102
108
return r
103
109
104
110
def record (self , elapse ):
105
111
num_silent = 0
106
112
snd_started = False
107
113
c = 0
108
114
109
- r = array ( 'h' )
115
+ r = bytearray ( )
110
116
111
117
t = time .time ()
112
- while time .time () - t < elapse :
113
- try :
114
- snd_data = array ('h' , self .stream_in .read (CHUNK_SIZE ))
115
- r .extend (snd_data )
116
- except IOError as ex :
117
- if ex [1 ] != pyaudio .paInputOverflowed :
118
- raise
119
- #buf = '\x00' * CHUNK_SIZE #white noise
120
- logging .info ("white noise" )
121
-
122
-
123
- logging .info ("read: " + str (len (r )) + " elapse: " + str (time .time () - t ))
124
-
125
-
126
- sample_width = self .pyaudio .get_sample_size (FORMAT )
118
+ with self .stream_in as stream :
119
+ audio_generator = stream .generator ()
120
+ for content in audio_generator :
121
+ r .extend (content )
122
+ if time .time () - t >= elapse :
123
+ return r
127
124
128
- r = self .normalize (r )
129
-
130
- return sample_width , r
131
-
132
125
def record_to_file (self , filename , elapse ):
133
- sample_width , data = self .record (elapse )
134
- data = pack ('<' + ('h' * len (data )), * data )
126
+ data = self .record (elapse )
127
+ # data = pack('<' + ('h'*len(data)), *data)
135
128
136
129
wf = wave .open (SOUNDDIR + filename , 'wb' )
137
130
wf .setnchannels (1 )
138
- wf .setsampwidth (sample_width )
131
+ wf .setsampwidth (self . pyaudio . get_sample_size ( FORMAT ) )
139
132
wf .setframerate (RATE )
140
133
wf .writeframes (data )
141
134
wf .close ()
@@ -155,44 +148,31 @@ def play(self, filename):
155
148
output = True)
156
149
157
150
# read data (based on the chunk size)
158
- data = wf.readframes(CHUNK_SIZE )
151
+ data = wf.readframes(CHUNK )
159
152
160
153
# play stream (looping from beginning of file to the end)
161
154
while data != '':
162
155
# writing to the stream is what *actually* plays the sound.
163
156
stream.write(data)
164
- data = wf.readframes(CHUNK_SIZE )
157
+ data = wf.readframes(CHUNK )
165
158
logging.info("play")
166
159
167
160
# cleanup stuff.
168
161
stream.close()
169
162
"""
170
163
171
164
def hear (self , level , elapse = 1.0 ):
172
- sig_hear = False
173
165
ts_total = time .time ()
174
- ts_signal = None
175
166
176
- while time .time () - ts_total < elapse :
177
- try :
178
- snd_data = self . stream_in . read ( CHUNK_SIZE )
179
- snd_rms = audioop . rms ( snd_data , 2 )
180
- logging . info ( "snd.rms: " + str ( snd_rms ) )
167
+ t = time .time ()
168
+ with self . stream_in as stream :
169
+ audio_generator = stream . generator ( )
170
+ for content in audio_generator :
171
+ snd_rms = audioop . rms ( content , 2 )
181
172
if snd_rms > level :
182
- sig_hear = True
183
- break
184
-
185
- except IOError as ex :
186
- if ex [1 ] != pyaudio .paInputOverflowed :
187
- raise
188
- buf = '\x00 ' * CHUNK_SIZE #white noise
189
- logging .info ("white noise" )
190
- except AttributeError :
191
- pass
192
-
193
-
194
- return sig_hear
195
-
173
+ return True
174
+ if time .time () - t >= elapse :
175
+ return False
196
176
197
177
def speech_recog (self , model ):
198
178
@@ -211,27 +191,18 @@ def speech_recog(self, model):
211
191
tstamp = time .time ()
212
192
recog_text = ''
213
193
214
- while len (recog_text ) < 1 :
215
- try :
216
- buf = self .stream_in .read (CHUNK_SIZE )
217
- logging .info ("actual voice" )
218
- decoder .process_raw (buf , False , False )
219
- if decoder .hyp ().hypstr != '' :
194
+ with self .stream_in as stream :
195
+ audio_generator = stream .generator ()
196
+ for content in audio_generator :
197
+ decoder .process_raw (content , False , False )
198
+ if decoder .hyp () and decoder .hyp ().hypstr != '' :
220
199
recog_text += decoder .hyp ().hypstr
221
200
print "text: " + decoder .hyp ().hypstr
222
201
tstamp = time .time ()
223
- except IOError as ex :
224
- if ex [1 ] != pyaudio .paInputOverflowed :
225
- raise
226
- buf = '\x00 ' * CHUNK_SIZE #white noise
227
- logging .info ("white noise" )
228
- except AttributeError :
229
- pass
230
-
231
- decoder .end_utt ()
232
-
233
- logging .info ("recog text: " + recog_text )
234
- return recog_text
202
+ if len (recog_text ) > 1 :
203
+ decoder .end_utt ()
204
+ logging .info ("recog text: " + recog_text )
205
+ return recog_text
235
206
236
207
def speech_recog_google (self , locale ):
237
208
config = types .RecognitionConfig (
@@ -243,75 +214,40 @@ def speech_recog_google(self, locale):
243
214
interim_results = False ,
244
215
single_utterance = True )
245
216
246
- with self .MicrophoneStream (RATE , CHUNK ) as stream :
217
+ t1 = time .time ()
218
+ with self .stream_in as stream :
247
219
audio_generator = stream .generator ()
248
220
requests = (types .StreamingRecognizeRequest (audio_content = content )
249
221
for content in audio_generator )
250
222
251
223
responses = self ._google_speech_client .streaming_recognize (streaming_config , requests )
252
224
253
225
# Now, put the transcription responses to use.
254
- #self.listen_print_loop(responses)
255
226
for response in responses :
227
+ if time .time () - t1 > 10 :
228
+ return ""
256
229
if response .results :
257
230
result = response .results [0 ]
258
231
if result .is_final :
259
232
return result .alternatives [0 ].transcript
260
233
261
- def listen_print_loop (self , responses ):
262
- for response in responses :
263
- if not response .results :
264
- continue
265
-
266
- # The `results` list is consecutive. For streaming, we only care about
267
- # the first result being considered, since once it's `is_final`, it
268
- # moves on to considering the next utterance.
269
- result = response .results [0 ]
270
- if not result .alternatives :
271
- continue
272
-
273
- # Display the transcription of the top alternative.
274
- transcript = result .alternatives [0 ].transcript
275
-
276
- # Display interim results, but with a carriage return at the end of the
277
- # line, so subsequent lines will overwrite them.
278
- #
279
- # If the previous result was longer than this one, we need to print
280
- # some extra spaces to overwrite the previous result
281
- overwrite_chars = ' ' * (num_chars_printed - len (transcript ))
282
-
283
- if not result .is_final :
284
- sys .stdout .write (transcript + overwrite_chars + '\r ' )
285
- sys .stdout .flush ()
286
-
287
- num_chars_printed = len (transcript )
288
-
289
- else :
290
- print (transcript + overwrite_chars )
291
-
292
- # Exit recognition if any of the transcribed phrases could be
293
- # one of our keywords.
294
- if re .search (r'\b(exit|quit)\b' , transcript , re .I ):
295
- print ('Exiting..' )
296
- break
297
-
298
- num_chars_printed = 0
299
-
300
-
301
234
class MicrophoneStream (object ):
302
235
"""Opens a recording stream as a generator yielding the audio chunks."""
303
- def __init__ (self , rate , chunk ):
236
+ def __init__ (self , fmt , rate , chunk ):
237
+ self ._audio_interface = None
238
+ self ._format = fmt
304
239
self ._rate = rate
305
240
self ._chunk = chunk
306
241
307
242
# Create a thread-safe buffer of audio data
308
- self ._buff = queue . Queue ()
243
+ self ._buff = None
309
244
self .closed = True
310
245
311
246
def __enter__ (self ):
312
247
self ._audio_interface = pyaudio .PyAudio ()
248
+ self ._buff = queue .Queue ()
313
249
self ._audio_stream = self ._audio_interface .open (
314
- format = pyaudio . paInt16 ,
250
+ format = self . _format ,
315
251
# The API currently only supports 1-channel (mono) audio
316
252
# https://goo.gl/z757pE
317
253
channels = 1 , rate = self ._rate ,
@@ -321,29 +257,20 @@ def __enter__(self):
321
257
# overflow while the calling thread makes network requests, etc.
322
258
stream_callback = self ._fill_buffer ,
323
259
)
324
-
325
260
self .closed = False
326
261
327
262
return self
328
263
329
264
def __exit__ (self , type , value , traceback ):
330
265
self ._audio_stream .stop_stream ()
331
266
self ._audio_stream .close ()
332
- self .closed = True
333
- # Signal the generator to terminate so that the client's
334
- # streaming_recognize method will not block the process termination.
335
- self ._buff .put (None )
336
267
self ._audio_interface .terminate ()
337
-
338
- def __exit__ (self , type , value , traceback ):
339
- self ._audio_stream .stop_stream ()
340
- self ._audio_stream .close ()
341
268
self .closed = True
342
- # Signal the generator to terminate so that the client's
343
- # streaming_recognize method will not block the process termination.
344
269
self ._buff .put (None )
345
- self ._audio_interface .terminate ()
346
270
271
+ def close (self ):
272
+ pass
273
+
347
274
def _fill_buffer (self , in_data , frame_count , time_info , status_flags ):
348
275
"""Continuously collect data from the audio stream, into the buffer."""
349
276
self ._buff .put (in_data )
0 commit comments