Skip to content

Commit 6e07b90

Browse files
committed
wip #35
1 parent e1d2adc commit 6e07b90

File tree

3 files changed

+161
-5
lines changed

3 files changed

+161
-5
lines changed

audio.py

Lines changed: 158 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,23 @@
3535
except:
3636
logging.info("pocketsphinx not available")
3737

38-
CHUNK_SIZE = 4096
39-
FORMAT = pyaudio.paInt16
40-
RATE = 44100
38+
## GOOGLE Speech API ##
39+
# [START import_libraries]
40+
#from __future__ import division
41+
42+
import re
43+
import sys
44+
45+
from google.cloud import speech
46+
from google.cloud.speech import enums
47+
from google.cloud.speech import types
48+
import pyaudio
49+
from six.moves import queue
50+
# [END import_libraries]
51+
52+
# Audio recording parameters
53+
RATE = 16000
54+
CHUNK = int(RATE / 10) # 100ms
4155

4256
MODELDIR = "/home/pi/coderbot/psmodels/"
4357
SOUNDDIR = "./sounds/"
@@ -62,6 +76,8 @@ def __init__(self):
6276
except:
6377
logging.info("Audio: input stream not available")
6478

79+
self._google_speech_client = speech.SpeechClient()
80+
6581
def exit(self):
6682
# cleanup stuff.
6783
self.stream_in.close()
@@ -217,3 +233,142 @@ def speech_recog(self, model):
217233
logging.info("recog text: " + recog_text)
218234
return recog_text
219235

236+
def speech_recog_google(self, locale):
237+
config = types.RecognitionConfig(
238+
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
239+
sample_rate_hertz=RATE,
240+
language_code=locale)
241+
streaming_config = types.StreamingRecognitionConfig(
242+
config=config,
243+
interim_results=False,
244+
single_utterance=True)
245+
246+
with self.MicrophoneStream(RATE, CHUNK) as stream:
247+
audio_generator = stream.generator()
248+
requests = (types.StreamingRecognizeRequest(audio_content=content)
249+
for content in audio_generator)
250+
251+
responses = self._google_speech_client.streaming_recognize(streaming_config, requests)
252+
253+
# Now, put the transcription responses to use.
254+
#self.listen_print_loop(responses)
255+
for response in responses:
256+
if response.results:
257+
result = response.results[0]
258+
if result.is_final:
259+
return result.alternatives[0].transcript
260+
261+
def listen_print_loop(self, responses):
262+
for response in responses:
263+
if not response.results:
264+
continue
265+
266+
# The `results` list is consecutive. For streaming, we only care about
267+
# the first result being considered, since once it's `is_final`, it
268+
# moves on to considering the next utterance.
269+
result = response.results[0]
270+
if not result.alternatives:
271+
continue
272+
273+
# Display the transcription of the top alternative.
274+
transcript = result.alternatives[0].transcript
275+
276+
# Display interim results, but with a carriage return at the end of the
277+
# line, so subsequent lines will overwrite them.
278+
#
279+
# If the previous result was longer than this one, we need to print
280+
# some extra spaces to overwrite the previous result
281+
overwrite_chars = ' ' * (num_chars_printed - len(transcript))
282+
283+
if not result.is_final:
284+
sys.stdout.write(transcript + overwrite_chars + '\r')
285+
sys.stdout.flush()
286+
287+
num_chars_printed = len(transcript)
288+
289+
else:
290+
print(transcript + overwrite_chars)
291+
292+
# Exit recognition if any of the transcribed phrases could be
293+
# one of our keywords.
294+
if re.search(r'\b(exit|quit)\b', transcript, re.I):
295+
print('Exiting..')
296+
break
297+
298+
num_chars_printed = 0
299+
300+
301+
class MicrophoneStream(object):
302+
"""Opens a recording stream as a generator yielding the audio chunks."""
303+
def __init__(self, rate, chunk):
304+
self._rate = rate
305+
self._chunk = chunk
306+
307+
# Create a thread-safe buffer of audio data
308+
self._buff = queue.Queue()
309+
self.closed = True
310+
311+
def __enter__(self):
312+
self._audio_interface = pyaudio.PyAudio()
313+
self._audio_stream = self._audio_interface.open(
314+
format=pyaudio.paInt16,
315+
# The API currently only supports 1-channel (mono) audio
316+
# https://goo.gl/z757pE
317+
channels=1, rate=self._rate,
318+
input=True, frames_per_buffer=self._chunk,
319+
# Run the audio stream asynchronously to fill the buffer object.
320+
# This is necessary so that the input device's buffer doesn't
321+
# overflow while the calling thread makes network requests, etc.
322+
stream_callback=self._fill_buffer,
323+
)
324+
325+
self.closed = False
326+
327+
return self
328+
329+
def __exit__(self, type, value, traceback):
330+
self._audio_stream.stop_stream()
331+
self._audio_stream.close()
332+
self.closed = True
333+
# Signal the generator to terminate so that the client's
334+
# streaming_recognize method will not block the process termination.
335+
self._buff.put(None)
336+
self._audio_interface.terminate()
337+
338+
def __exit__(self, type, value, traceback):
339+
self._audio_stream.stop_stream()
340+
self._audio_stream.close()
341+
self.closed = True
342+
# Signal the generator to terminate so that the client's
343+
# streaming_recognize method will not block the process termination.
344+
self._buff.put(None)
345+
self._audio_interface.terminate()
346+
347+
def _fill_buffer(self, in_data, frame_count, time_info, status_flags):
348+
"""Continuously collect data from the audio stream, into the buffer."""
349+
self._buff.put(in_data)
350+
return None, pyaudio.paContinue
351+
352+
def generator(self):
353+
while not self.closed:
354+
# Use a blocking get() to ensure there's at least one chunk of
355+
# data, and stop iteration if the chunk is None, indicating the
356+
# end of the audio stream.
357+
chunk = self._buff.get()
358+
if chunk is None:
359+
return
360+
data = [chunk]
361+
362+
# Now consume whatever other data's still buffered.
363+
while True:
364+
try:
365+
chunk = self._buff.get(block=False)
366+
if chunk is None:
367+
return
368+
data.append(chunk)
369+
except queue.Empty:
370+
break
371+
372+
yield b''.join(data)
373+
# [END audio_stream]
374+

coderbot.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
{"move_tr_speed": "80", "move_fw_elapse": "1", "camera_color_object_size_min": "4000", "camera_path_object_size_min": "4000", "load_at_start": "", "move_tr_elapse": "0.5", "sound_stop": "$shutdown.mp3", "show_control_move_commands": "true", "prog_level": "adv", "prog_scrollbars": "true", "move_fw_speed": "100", "camera_color_object_size_max": "160000", "sound_shutter": "$shutter.mp3", "show_page_prefs": "true", "cv_image_factor": "4", "ctrl_hud_image": "", "button_func": "none", "ctrl_fw_elapse": "-1", "ctrl_tr_elapse": "-1", "move_power_angle_2": "60", "move_power_angle_3": "60", "move_power_angle_1": "45", "move_motor_trim": "1", "cnn_default_model": "apple_kiwi_tomato_fast", "show_page_program": "true", "sound_start": "$startup.mp3", "camera_exposure_mode": "auto", "ctrl_tr_speed": "80", "prog_move_mpu": "yes", "ctrl_fw_speed": "100", "camera_refresh_timeout": "0.1", "camera_jpeg_quality": "20", "prog_maxblocks": "-1", "move_motor_mode": "dc", "camera_path_object_size_max": "160000", "show_page_control": "true"}
1+
{"move_tr_speed": "80", "move_fw_elapse": "1", "camera_color_object_size_min": "4000", "camera_path_object_size_min": "4000", "load_at_start": "", "move_tr_elapse": "0.5", "sound_stop": "$shutdown.mp3", "show_control_move_commands": "true", "prog_level": "adv", "prog_scrollbars": "true", "move_fw_speed": "100", "camera_color_object_size_max": "160000", "sound_shutter": "$shutter.mp3", "show_page_prefs": "true", "cv_image_factor": "4", "ctrl_hud_image": "", "button_func": "none", "ctrl_fw_elapse": "-1", "ctrl_tr_elapse": "-1", "move_power_angle_2": "60", "move_power_angle_3": "60", "move_power_angle_1": "45", "move_motor_trim": "1", "cnn_default_model": "apple_kiwi_fast", "show_page_program": "true", "sound_start": "$startup.mp3", "camera_exposure_mode": "auto", "ctrl_tr_speed": "80", "prog_move_mpu": "yes", "ctrl_fw_speed": "100", "camera_refresh_timeout": "0.1", "camera_jpeg_quality": "20", "prog_maxblocks": "-1", "move_motor_mode": "dc", "camera_path_object_size_max": "160000", "show_page_control": "true"}

static/js/blockly/blocks.js

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,8 @@ Blockly.Blocks['coderbot_audio_listen'] = {
919919
Blockly.Python['coderbot_audio_listen'] = function(block) {
920920
// Boolean values true and false.
921921
var model = block.getFieldValue('MODEL');
922-
var code = 'get_audio().speech_recog(model="' + model + '")';
922+
//var code = 'get_audio().speech_recog(model="' + model + '")';
923+
var code = 'get_audio().speech_recog_google(locale="it_IT")';
923924
return [code, Blockly.Python.ORDER_ATOMIC];
924925
};
925926

0 commit comments

Comments
 (0)