Skip to content

Commit 21c9b5d

Browse files
committed
[respeaker_ros] change frames_per_buffer for webrtcvad
1 parent 264a607 commit 21c9b5d

File tree

2 files changed

+319
-1
lines changed

2 files changed

+319
-1
lines changed

respeaker_ros/scripts/respeaker_node.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ def __init__(self):
309309
self.speech_continuation = rospy.get_param("~speech_continuation", 0.5)
310310
self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0)
311311
self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1)
312+
self.sample_duration = rospy.get_param("~sample_duration", None) # sec
312313
suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True)
313314
#
314315
self.respeaker = RespeakerInterface()
@@ -327,7 +328,8 @@ def __init__(self):
327328
self.config = None
328329
self.dyn_srv = Server(RespeakerConfig, self.on_config)
329330
# start
330-
self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
331+
self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error,
332+
sample_duration=self.sample_duration)
331333
self.speech_prefetch_bytes = int(
332334
self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
333335
self.speech_prefetch_buffer = b""
Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
import angles
2+
from contextlib import contextmanager
3+
import usb.core
4+
import usb.util
5+
import pyaudio
6+
import math
7+
import numpy as np
8+
import tf.transformations as T
9+
import os
10+
import rospy
11+
import struct
12+
import sys
13+
import time
14+
from audio_common_msgs.msg import AudioData
15+
from audio_common_msgs.msg import AudioInfo
16+
from geometry_msgs.msg import PoseStamped
17+
from std_msgs.msg import Bool, Int32, ColorRGBA
18+
from dynamic_reconfigure.server import Server
19+
20+
# https://stackoverflow.com/questions/21367320/searching-for-equivalent-of-filenotfounderror-in-python-2
21+
try:
22+
FileNotFoundError
23+
except NameError:
24+
FileNotFoundError = IOError
25+
try:
26+
from pixel_ring import usb_pixel_ring_v2
27+
except (IOError, FileNotFoundError) as e:
28+
rospy.logerr(e)
29+
raise RuntimeError("Check the device is connected and recognized")
30+
31+
try:
32+
from respeaker_ros.cfg import RespeakerConfig
33+
except Exception as e:
34+
rospy.logerr(e)
35+
raise RuntimeError("Need to run respeaker_gencfg.py first")
36+
37+
# suppress error messages from ALSA
38+
# https://stackoverflow.com/questions/7088672/pyaudio-working-but-spits-out-error-messages-each-time
39+
# https://stackoverflow.com/questions/36956083/how-can-the-terminal-output-of-executables-run-by-python-functions-be-silenced-i
40+
@contextmanager
41+
def ignore_stderr(enable=True):
42+
if enable:
43+
devnull = None
44+
try:
45+
devnull = os.open(os.devnull, os.O_WRONLY)
46+
stderr = os.dup(2)
47+
sys.stderr.flush()
48+
os.dup2(devnull, 2)
49+
try:
50+
yield
51+
finally:
52+
os.dup2(stderr, 2)
53+
os.close(stderr)
54+
finally:
55+
if devnull is not None:
56+
os.close(devnull)
57+
else:
58+
yield
59+
60+
# Partly copied from https://github.com/respeaker/usb_4_mic_array
61+
# parameter list
62+
# name: (id, offset, type, max, min , r/w, info)
63+
PARAMETERS = {
64+
'AECFREEZEONOFF': (18, 7, 'int', 1, 0, 'rw', 'Adaptive Echo Canceler updates inhibit.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
65+
'AECNORM': (18, 19, 'float', 16, 0.25, 'rw', 'Limit on norm of AEC filter coefficients'),
66+
'AECPATHCHANGE': (18, 25, 'int', 1, 0, 'ro', 'AEC Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
67+
'RT60': (18, 26, 'float', 0.9, 0.25, 'ro', 'Current RT60 estimate in seconds'),
68+
'HPFONOFF': (18, 27, 'int', 3, 0, 'rw', 'High-pass Filter on microphone signals.', '0 = OFF', '1 = ON - 70 Hz cut-off', '2 = ON - 125 Hz cut-off', '3 = ON - 180 Hz cut-off'),
69+
'RT60ONOFF': (18, 28, 'int', 1, 0, 'rw', 'RT60 Estimation for AES. 0 = OFF 1 = ON'),
70+
'AECSILENCELEVEL': (18, 30, 'float', 1, 1e-09, 'rw', 'Threshold for signal detection in AEC [-inf .. 0] dBov (Default: -80dBov = 10log10(1x10-8))'),
71+
'AECSILENCEMODE': (18, 31, 'int', 1, 0, 'ro', 'AEC far-end silence detection status. ', '0 = false (signal detected) ', '1 = true (silence detected)'),
72+
'AGCONOFF': (19, 0, 'int', 1, 0, 'rw', 'Automatic Gain Control. ', '0 = OFF ', '1 = ON'),
73+
'AGCMAXGAIN': (19, 1, 'float', 1000, 1, 'rw', 'Maximum AGC gain factor. ', '[0 .. 60] dB (default 30dB = 20log10(31.6))'),
74+
'AGCDESIREDLEVEL': (19, 2, 'float', 0.99, 1e-08, 'rw', 'Target power level of the output signal. ', '[-inf .. 0] dBov (default: -23dBov = 10log10(0.005))'),
75+
'AGCGAIN': (19, 3, 'float', 1000, 1, 'rw', 'Current AGC gain factor. ', '[0 .. 60] dB (default: 0.0dB = 20log10(1.0))'),
76+
'AGCTIME': (19, 4, 'float', 1, 0.1, 'rw', 'Ramps-up / down time-constant in seconds.'),
77+
'CNIONOFF': (19, 5, 'int', 1, 0, 'rw', 'Comfort Noise Insertion.', '0 = OFF', '1 = ON'),
78+
'FREEZEONOFF': (19, 6, 'int', 1, 0, 'rw', 'Adaptive beamformer updates.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
79+
'STATNOISEONOFF': (19, 8, 'int', 1, 0, 'rw', 'Stationary noise suppression.', '0 = OFF', '1 = ON'),
80+
'GAMMA_NS': (19, 9, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise. min .. max attenuation'),
81+
'MIN_NS': (19, 10, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
82+
'NONSTATNOISEONOFF': (19, 11, 'int', 1, 0, 'rw', 'Non-stationary noise suppression.', '0 = OFF', '1 = ON'),
83+
'GAMMA_NN': (19, 12, 'float', 3, 0, 'rw', 'Over-subtraction factor of non- stationary noise. min .. max attenuation'),
84+
'MIN_NN': (19, 13, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
85+
'ECHOONOFF': (19, 14, 'int', 1, 0, 'rw', 'Echo suppression.', '0 = OFF', '1 = ON'),
86+
'GAMMA_E': (19, 15, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (direct and early components). min .. max attenuation'),
87+
'GAMMA_ETAIL': (19, 16, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (tail components). min .. max attenuation'),
88+
'GAMMA_ENL': (19, 17, 'float', 5, 0, 'rw', 'Over-subtraction factor of non-linear echo. min .. max attenuation'),
89+
'NLATTENONOFF': (19, 18, 'int', 1, 0, 'rw', 'Non-Linear echo attenuation.', '0 = OFF', '1 = ON'),
90+
'NLAEC_MODE': (19, 20, 'int', 2, 0, 'rw', 'Non-Linear AEC training mode.', '0 = OFF', '1 = ON - phase 1', '2 = ON - phase 2'),
91+
'SPEECHDETECTED': (19, 22, 'int', 1, 0, 'ro', 'Speech detection status.', '0 = false (no speech detected)', '1 = true (speech detected)'),
92+
'FSBUPDATED': (19, 23, 'int', 1, 0, 'ro', 'FSB Update Decision.', '0 = false (FSB was not updated)', '1 = true (FSB was updated)'),
93+
'FSBPATHCHANGE': (19, 24, 'int', 1, 0, 'ro', 'FSB Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
94+
'TRANSIENTONOFF': (19, 29, 'int', 1, 0, 'rw', 'Transient echo suppression.', '0 = OFF', '1 = ON'),
95+
'VOICEACTIVITY': (19, 32, 'int', 1, 0, 'ro', 'VAD voice activity status.', '0 = false (no voice activity)', '1 = true (voice activity)'),
96+
'STATNOISEONOFF_SR': (19, 33, 'int', 1, 0, 'rw', 'Stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
97+
'NONSTATNOISEONOFF_SR': (19, 34, 'int', 1, 0, 'rw', 'Non-stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
98+
'GAMMA_NS_SR': (19, 35, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.0)'),
99+
'GAMMA_NN_SR': (19, 36, 'float', 3, 0, 'rw', 'Over-subtraction factor of non-stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.1)'),
100+
'MIN_NS_SR': (19, 37, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
101+
'MIN_NN_SR': (19, 38, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
102+
'GAMMAVAD_SR': (19, 39, 'float', 1000, 0, 'rw', 'Set the threshold for voice activity detection.', '[-inf .. 60] dB (default: 3.5dB 20log10(1.5))'),
103+
# 'KEYWORDDETECT': (20, 0, 'int', 1, 0, 'ro', 'Keyword detected. Current value so needs polling.'),
104+
'DOAANGLE': (21, 0, 'int', 359, 0, 'ro', 'DOA angle. Current value. Orientation depends on build configuration.')
105+
}
106+
107+
108+
class RespeakerInterface(object):
109+
VENDOR_ID = 0x2886
110+
PRODUCT_ID = 0x0018
111+
TIMEOUT = 100000
112+
113+
def __init__(self):
114+
self.dev = usb.core.find(idVendor=self.VENDOR_ID,
115+
idProduct=self.PRODUCT_ID)
116+
if not self.dev:
117+
raise RuntimeError("Failed to find Respeaker device")
118+
rospy.loginfo("Initializing Respeaker device")
119+
try:
120+
self.dev.reset()
121+
except usb.core.USBError:
122+
rospy.logerr(
123+
"You may have to give the right permission on respeaker device. "
124+
"Please run the command as followings to register udev rules.\n"
125+
"$ roscd respeaker_ros \n"
126+
"$ sudo cp -f $(rospack find respeaker_ros)/config/60-respeaker.rules /etc/udev/rules.d/60-respeaker.rules \n"
127+
"$ sudo systemctl restart udev \n"
128+
"You may find further details at https://github.com/jsk-ros-pkg/jsk_3rdparty/blob/master/respeaker_ros/README.md"
129+
) # NOQA
130+
raise
131+
self.pixel_ring = usb_pixel_ring_v2.PixelRing(self.dev)
132+
self.set_led_think()
133+
time.sleep(5) # it will take 5 seconds to re-recognize as audio device
134+
self.set_led_trace()
135+
rospy.loginfo("Respeaker device initialized (Version: %s)" % self.version)
136+
137+
def __del__(self):
138+
try:
139+
self.close()
140+
except:
141+
pass
142+
finally:
143+
self.dev = None
144+
145+
def write(self, name, value):
146+
try:
147+
data = PARAMETERS[name]
148+
except KeyError:
149+
return
150+
151+
if data[5] == 'ro':
152+
raise ValueError('{} is read-only'.format(name))
153+
154+
id = data[0]
155+
156+
# 4 bytes offset, 4 bytes value, 4 bytes type
157+
if data[2] == 'int':
158+
payload = struct.pack(b'iii', data[1], int(value), 1)
159+
else:
160+
payload = struct.pack(b'ifi', data[1], float(value), 0)
161+
162+
self.dev.ctrl_transfer(
163+
usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
164+
0, 0, id, payload, self.TIMEOUT)
165+
166+
def read(self, name):
167+
try:
168+
data = PARAMETERS[name]
169+
except KeyError:
170+
return
171+
172+
id = data[0]
173+
174+
cmd = 0x80 | data[1]
175+
if data[2] == 'int':
176+
cmd |= 0x40
177+
178+
length = 8
179+
180+
try:
181+
response = self.dev.ctrl_transfer(
182+
usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
183+
0, cmd, id, length, self.TIMEOUT)
184+
except usb.core.USBError as e:
185+
rospy.logerr(e)
186+
rospy.signal_shutdown('Shutdown this node because of USBError')
187+
188+
if sys.version_info.major == 2:
189+
response = struct.unpack(b'ii', response.tostring())
190+
else:
191+
response = struct.unpack(b'ii', response.tobytes())
192+
193+
if data[2] == 'int':
194+
result = response[0]
195+
else:
196+
result = response[0] * (2.**response[1])
197+
198+
return result
199+
200+
def set_led_think(self):
201+
self.pixel_ring.set_brightness(10)
202+
self.pixel_ring.think()
203+
204+
def set_led_trace(self):
205+
self.pixel_ring.set_brightness(20)
206+
self.pixel_ring.trace()
207+
208+
def set_led_color(self, r, g, b, a):
209+
self.pixel_ring.set_brightness(int(20 * a))
210+
self.pixel_ring.set_color(r=int(r*255), g=int(g*255), b=int(b*255))
211+
212+
def set_vad_threshold(self, db):
213+
self.write('GAMMAVAD_SR', db)
214+
215+
def is_voice(self):
216+
return self.read('VOICEACTIVITY')
217+
218+
@property
219+
def direction(self):
220+
return self.read('DOAANGLE')
221+
222+
@property
223+
def version(self):
224+
return self.dev.ctrl_transfer(
225+
usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
226+
0, 0x80, 0, 1, self.TIMEOUT)[0]
227+
228+
def close(self):
229+
"""
230+
close the interface
231+
"""
232+
usb.util.dispose_resources(self.dev)
233+
234+
235+
class RespeakerAudio(object):
236+
def __init__(self, on_audio, channel=0, suppress_error=True, sample_duration=None):
237+
self.on_audio = on_audio
238+
with ignore_stderr(enable=suppress_error):
239+
self.pyaudio = pyaudio.PyAudio()
240+
self.channels = None
241+
self.channel = channel
242+
self.device_index = None
243+
self.rate = 16000
244+
self.bitwidth = 2
245+
self.bitdepth = 16
246+
if sample_duration is not None:
247+
frames_per_buffer = int(sample_duration * self.rate)
248+
else:
249+
frames_per_buffer = 1024
250+
251+
# find device
252+
count = self.pyaudio.get_device_count()
253+
rospy.logdebug("%d audio devices found" % count)
254+
for i in range(count):
255+
info = self.pyaudio.get_device_info_by_index(i)
256+
name = info["name"].encode("utf-8")
257+
chan = info["maxInputChannels"]
258+
rospy.logdebug(" - %d: %s" % (i, name))
259+
if name.lower().find(b"respeaker") >= 0:
260+
self.channels = chan
261+
self.device_index = i
262+
rospy.loginfo("Found %d: %s (channels: %d)" % (i, name, chan))
263+
break
264+
if self.device_index is None:
265+
rospy.logwarn("Failed to find respeaker device by name. Using default input")
266+
info = self.pyaudio.get_default_input_device_info()
267+
self.channels = info["maxInputChannels"]
268+
self.device_index = info["index"]
269+
270+
if self.channels != 6:
271+
rospy.logwarn("%d channel is found for respeaker" % self.channels)
272+
rospy.logwarn("You may have to update firmware.")
273+
self.channel = min(self.channels - 1, max(0, self.channel))
274+
275+
self.stream = self.pyaudio.open(
276+
input=True, start=False,
277+
format=pyaudio.paInt16,
278+
channels=self.channels,
279+
rate=self.rate,
280+
frames_per_buffer=frames_per_buffer,
281+
stream_callback=self.stream_callback,
282+
input_device_index=self.device_index,
283+
)
284+
285+
def __del__(self):
286+
self.stop()
287+
try:
288+
self.stream.close()
289+
except:
290+
pass
291+
finally:
292+
self.stream = None
293+
try:
294+
self.pyaudio.terminate()
295+
except:
296+
pass
297+
298+
def stream_callback(self, in_data, frame_count, time_info, status):
299+
# split channel
300+
data = np.frombuffer(in_data, dtype=np.int16)
301+
chunk_per_channel = int(len(data) / self.channels)
302+
data = np.reshape(data, (chunk_per_channel, self.channels))
303+
chan_data = data[:, self.channel]
304+
# invoke callback
305+
self.on_audio(chan_data.tobytes())
306+
return None, pyaudio.paContinue
307+
308+
def start(self):
309+
if self.stream.is_stopped():
310+
self.stream.start_stream()
311+
312+
def stop(self):
313+
if self.stream.is_active():
314+
self.stream.stop_stream()
315+
316+

0 commit comments

Comments
 (0)