Skip to content

Commit c6a8b58

Browse files
committed
wip #35
1 parent 6e07b90 commit c6a8b58

File tree

7 files changed

+180
-133
lines changed

7 files changed

+180
-133
lines changed

audio.py

Lines changed: 60 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
# Audio recording parameters
5353
RATE = 16000
5454
CHUNK = int(RATE / 10) # 100ms
55+
FORMAT = pyaudio.paInt16
5556

5657
MODELDIR = "/home/pi/coderbot/psmodels/"
5758
SOUNDDIR = "./sounds/"
@@ -67,21 +68,25 @@ def get_instance(cls):
6768
return cls._instance
6869

6970
def __init__(self):
70-
self.pyaudio = pyaudio.PyAudio()
71+
#self.pyaudio = pyaudio.PyAudio()
7172
try:
72-
self.stream_in = self.pyaudio.open(format=FORMAT, channels=1, input_device_index=2, rate=RATE,
73-
input=True,
74-
frames_per_buffer=CHUNK_SIZE)
75-
self.stream_in.start_stream()
76-
except:
73+
#self.stream_in = self.pyaudio.open(format=FORMAT,
74+
# channels=1, rate=RATE,
75+
# input=True,
76+
# frames_per_buffer=CHUNK)
77+
#self.stream_in.start_stream()
78+
self.stream_in = self.MicrophoneStream(FORMAT, RATE, CHUNK)
79+
except Exception as e:
80+
print e
7781
logging.info("Audio: input stream not available")
7882

7983
self._google_speech_client = speech.SpeechClient()
8084

8185
def exit(self):
86+
pass
8287
# cleanup stuff.
83-
self.stream_in.close()
84-
self.pyaudio.terminate()
88+
#self.stream_in.close()
89+
#self.pyaudio.terminate()
8590

8691
def say(self, what, locale='en'):
8792
if what and "$" in what:
@@ -92,50 +97,38 @@ def say(self, what, locale='en'):
9297
def normalize(self, snd_data):
9398
"Average the volume out"
9499
MAXIMUM = 16384
95-
#times = float(MAXIMUM) / audioop.rms(snd_data, 2)
96100
times = float(MAXIMUM)/max(abs(i) for i in snd_data)
97-
logging.info("times: " + str(times))
101+
print "times: " + str(times)
98102

99-
r = array('h')
103+
r = array('h', snd_data)
104+
c = 0
100105
for i in snd_data:
101-
r.append(int(i*times))
106+
r[c] = int(i*times)
107+
c +=1
102108
return r
103109

104110
def record(self, elapse):
105111
num_silent = 0
106112
snd_started = False
107113
c = 0
108114

109-
r = array('h')
115+
r = bytearray()
110116

111117
t = time.time()
112-
while time.time() - t < elapse:
113-
try:
114-
snd_data = array('h', self.stream_in.read(CHUNK_SIZE))
115-
r.extend(snd_data)
116-
except IOError as ex:
117-
if ex[1] != pyaudio.paInputOverflowed:
118-
raise
119-
#buf = '\x00' * CHUNK_SIZE #white noise
120-
logging.info("white noise")
121-
122-
123-
logging.info("read: " + str(len(r)) + " elapse: " + str(time.time() - t))
124-
125-
126-
sample_width = self.pyaudio.get_sample_size(FORMAT)
118+
with self.stream_in as stream:
119+
audio_generator = stream.generator()
120+
for content in audio_generator:
121+
r.extend(content)
122+
if time.time() - t >= elapse:
123+
return r
127124

128-
r = self.normalize(r)
129-
130-
return sample_width, r
131-
132125
def record_to_file(self, filename, elapse):
133-
sample_width, data = self.record(elapse)
134-
data = pack('<' + ('h'*len(data)), *data)
126+
data = self.record(elapse)
127+
#data = pack('<' + ('h'*len(data)), *data)
135128

136129
wf = wave.open(SOUNDDIR + filename, 'wb')
137130
wf.setnchannels(1)
138-
wf.setsampwidth(sample_width)
131+
wf.setsampwidth(self.pyaudio.get_sample_size(FORMAT))
139132
wf.setframerate(RATE)
140133
wf.writeframes(data)
141134
wf.close()
@@ -155,44 +148,31 @@ def play(self, filename):
155148
output = True)
156149
157150
# read data (based on the chunk size)
158-
data = wf.readframes(CHUNK_SIZE)
151+
data = wf.readframes(CHUNK)
159152
160153
# play stream (looping from beginning of file to the end)
161154
while data != '':
162155
# writing to the stream is what *actually* plays the sound.
163156
stream.write(data)
164-
data = wf.readframes(CHUNK_SIZE)
157+
data = wf.readframes(CHUNK)
165158
logging.info("play")
166159
167160
# cleanup stuff.
168161
stream.close()
169162
"""
170163

171164
def hear(self, level, elapse=1.0):
172-
sig_hear = False
173165
ts_total = time.time()
174-
ts_signal = None
175166

176-
while time.time() - ts_total < elapse:
177-
try:
178-
snd_data = self.stream_in.read(CHUNK_SIZE)
179-
snd_rms = audioop.rms(snd_data, 2)
180-
logging.info("snd.rms: " + str(snd_rms))
167+
t = time.time()
168+
with self.stream_in as stream:
169+
audio_generator = stream.generator()
170+
for content in audio_generator:
171+
snd_rms = audioop.rms(content, 2)
181172
if snd_rms > level:
182-
sig_hear = True
183-
break
184-
185-
except IOError as ex:
186-
if ex[1] != pyaudio.paInputOverflowed:
187-
raise
188-
buf = '\x00' * CHUNK_SIZE #white noise
189-
logging.info("white noise")
190-
except AttributeError:
191-
pass
192-
193-
194-
return sig_hear
195-
173+
return True
174+
if time.time() - t >= elapse:
175+
return False
196176

197177
def speech_recog(self, model):
198178

@@ -211,27 +191,18 @@ def speech_recog(self, model):
211191
tstamp = time.time()
212192
recog_text = ''
213193

214-
while len(recog_text) < 1:
215-
try:
216-
buf = self.stream_in.read(CHUNK_SIZE)
217-
logging.info("actual voice")
218-
decoder.process_raw(buf, False, False)
219-
if decoder.hyp().hypstr != '':
194+
with self.stream_in as stream:
195+
audio_generator = stream.generator()
196+
for content in audio_generator:
197+
decoder.process_raw(content, False, False)
198+
if decoder.hyp() and decoder.hyp().hypstr != '':
220199
recog_text += decoder.hyp().hypstr
221200
print "text: " + decoder.hyp().hypstr
222201
tstamp = time.time()
223-
except IOError as ex:
224-
if ex[1] != pyaudio.paInputOverflowed:
225-
raise
226-
buf = '\x00' * CHUNK_SIZE #white noise
227-
logging.info("white noise")
228-
except AttributeError:
229-
pass
230-
231-
decoder.end_utt()
232-
233-
logging.info("recog text: " + recog_text)
234-
return recog_text
202+
if len(recog_text) > 1:
203+
decoder.end_utt()
204+
logging.info("recog text: " + recog_text)
205+
return recog_text
235206

236207
def speech_recog_google(self, locale):
237208
config = types.RecognitionConfig(
@@ -243,75 +214,40 @@ def speech_recog_google(self, locale):
243214
interim_results=False,
244215
single_utterance=True)
245216

246-
with self.MicrophoneStream(RATE, CHUNK) as stream:
217+
t1 = time.time()
218+
with self.stream_in as stream:
247219
audio_generator = stream.generator()
248220
requests = (types.StreamingRecognizeRequest(audio_content=content)
249221
for content in audio_generator)
250222

251223
responses = self._google_speech_client.streaming_recognize(streaming_config, requests)
252224

253225
# Now, put the transcription responses to use.
254-
#self.listen_print_loop(responses)
255226
for response in responses:
227+
if time.time() - t1 > 10:
228+
return ""
256229
if response.results:
257230
result = response.results[0]
258231
if result.is_final:
259232
return result.alternatives[0].transcript
260233

261-
def listen_print_loop(self, responses):
262-
for response in responses:
263-
if not response.results:
264-
continue
265-
266-
# The `results` list is consecutive. For streaming, we only care about
267-
# the first result being considered, since once it's `is_final`, it
268-
# moves on to considering the next utterance.
269-
result = response.results[0]
270-
if not result.alternatives:
271-
continue
272-
273-
# Display the transcription of the top alternative.
274-
transcript = result.alternatives[0].transcript
275-
276-
# Display interim results, but with a carriage return at the end of the
277-
# line, so subsequent lines will overwrite them.
278-
#
279-
# If the previous result was longer than this one, we need to print
280-
# some extra spaces to overwrite the previous result
281-
overwrite_chars = ' ' * (num_chars_printed - len(transcript))
282-
283-
if not result.is_final:
284-
sys.stdout.write(transcript + overwrite_chars + '\r')
285-
sys.stdout.flush()
286-
287-
num_chars_printed = len(transcript)
288-
289-
else:
290-
print(transcript + overwrite_chars)
291-
292-
# Exit recognition if any of the transcribed phrases could be
293-
# one of our keywords.
294-
if re.search(r'\b(exit|quit)\b', transcript, re.I):
295-
print('Exiting..')
296-
break
297-
298-
num_chars_printed = 0
299-
300-
301234
class MicrophoneStream(object):
302235
"""Opens a recording stream as a generator yielding the audio chunks."""
303-
def __init__(self, rate, chunk):
236+
def __init__(self, fmt, rate, chunk):
237+
self._audio_interface = None
238+
self._format = fmt
304239
self._rate = rate
305240
self._chunk = chunk
306241

307242
# Create a thread-safe buffer of audio data
308-
self._buff = queue.Queue()
243+
self._buff = None
309244
self.closed = True
310245

311246
def __enter__(self):
312247
self._audio_interface = pyaudio.PyAudio()
248+
self._buff = queue.Queue()
313249
self._audio_stream = self._audio_interface.open(
314-
format=pyaudio.paInt16,
250+
format=self._format,
315251
# The API currently only supports 1-channel (mono) audio
316252
# https://goo.gl/z757pE
317253
channels=1, rate=self._rate,
@@ -321,29 +257,20 @@ def __enter__(self):
321257
# overflow while the calling thread makes network requests, etc.
322258
stream_callback=self._fill_buffer,
323259
)
324-
325260
self.closed = False
326261

327262
return self
328263

329264
def __exit__(self, type, value, traceback):
330265
self._audio_stream.stop_stream()
331266
self._audio_stream.close()
332-
self.closed = True
333-
# Signal the generator to terminate so that the client's
334-
# streaming_recognize method will not block the process termination.
335-
self._buff.put(None)
336267
self._audio_interface.terminate()
337-
338-
def __exit__(self, type, value, traceback):
339-
self._audio_stream.stop_stream()
340-
self._audio_stream.close()
341268
self.closed = True
342-
# Signal the generator to terminate so that the client's
343-
# streaming_recognize method will not block the process termination.
344269
self._buff.put(None)
345-
self._audio_interface.terminate()
346270

271+
def close(self):
272+
pass
273+
347274
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
348275
"""Continuously collect data from the audio stream, into the buffer."""
349276
self._buff.put(in_data)

conversation.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# -*- coding:utf8 -*-
2+
# !/usr/bin/env python
3+
# Copyright 2017 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
import os.path
18+
import sys
19+
import random
20+
import json
21+
22+
try:
23+
import apiai
24+
except ImportError:
25+
sys.path.append(
26+
os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)
27+
)
28+
import apiai
29+
30+
CLIENT_ACCESS_TOKEN = 'a4c5990369cf4ce08b839abef0d2eac7'
31+
32+
33+
class Conversation:
34+
35+
_instance = None
36+
37+
@classmethod
38+
def get_instance(cls):
39+
if not cls._instance:
40+
cls._instance = Conversation()
41+
return cls._instance
42+
43+
def __init__(self):
44+
self._ai = apiai.ApiAI(CLIENT_ACCESS_TOKEN)
45+
self._session_id = str(int(random.random() * 1000000000000))
46+
47+
def get_action(self, query, locale):
48+
request = self._ai.text_request()
49+
50+
request.lang = locale
51+
52+
request.query = query
53+
54+
response = request.getresponse()
55+
56+
data = json.load(response)
57+
retval = {}
58+
retval["action"] = data["result"]["action"]
59+
retval["parameters"] = data["result"]["parameters"]
60+
retval["contexts"] = data["result"]["contexts"]
61+
retval["response"] = data["result"]["fulfillment"]["speech"]
62+
63+
return retval

0 commit comments

Comments
 (0)