[respeaker_ros] change frames_per_buffer for webrtcvad

sawada10 · sawada10 · commit 21c9b5df9102 · 2025-10-22T22:42:32.000+09:00
diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py
@@ -309,6 +309,7 @@ def __init__(self):
         self.speech_continuation = rospy.get_param("~speech_continuation", 0.5)
         self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0)
         self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1)
+        self.sample_duration = rospy.get_param("~sample_duration", None)  # sec
         suppress_pyaudio_error = rospy.get_param("~suppress_pyaudio_error", True)
         #
         self.respeaker = RespeakerInterface()
@@ -327,7 +328,8 @@ def __init__(self):
         self.config = None
         self.dyn_srv = Server(RespeakerConfig, self.on_config)
         # start
-        self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error)
+        self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error,
+                                              sample_duration=self.sample_duration)
         self.speech_prefetch_bytes = int(
             self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0)
         self.speech_prefetch_buffer = b""
diff --git a/respeaker_ros/src/respeaker_ros/__init__.py b/respeaker_ros/src/respeaker_ros/__init__.py
@@ -0,0 +1,316 @@
+import angles
+from contextlib import contextmanager
+import usb.core
+import usb.util
+import pyaudio
+import math
+import numpy as np
+import tf.transformations as T
+import os
+import rospy
+import struct
+import sys
+import time
+from audio_common_msgs.msg import AudioData
+from audio_common_msgs.msg import AudioInfo
+from geometry_msgs.msg import PoseStamped
+from std_msgs.msg import Bool, Int32, ColorRGBA
+from dynamic_reconfigure.server import Server
+
+# https://stackoverflow.com/questions/21367320/searching-for-equivalent-of-filenotfounderror-in-python-2
+try:
+    FileNotFoundError
+except NameError:
+    FileNotFoundError = IOError
+try:
+    from pixel_ring import usb_pixel_ring_v2
+except (IOError, FileNotFoundError) as e:
+    rospy.logerr(e)
+    raise RuntimeError("Check the device is connected and recognized")
+
+try:
+    from respeaker_ros.cfg import RespeakerConfig
+except Exception as e:
+    rospy.logerr(e)
+    raise RuntimeError("Need to run respeaker_gencfg.py first")
+
+# suppress error messages from ALSA
+# https://stackoverflow.com/questions/7088672/pyaudio-working-but-spits-out-error-messages-each-time
+# https://stackoverflow.com/questions/36956083/how-can-the-terminal-output-of-executables-run-by-python-functions-be-silenced-i
+@contextmanager
+def ignore_stderr(enable=True):
+    if enable:
+        devnull = None
+        try:
+            devnull = os.open(os.devnull, os.O_WRONLY)
+            stderr = os.dup(2)
+            sys.stderr.flush()
+            os.dup2(devnull, 2)
+            try:
+                yield
+            finally:
+                os.dup2(stderr, 2)
+                os.close(stderr)
+        finally:
+            if devnull is not None:
+                os.close(devnull)
+    else:
+        yield
+
+# Partly copied from https://github.com/respeaker/usb_4_mic_array
+# parameter list
+# name: (id, offset, type, max, min , r/w, info)
+PARAMETERS = {
+    'AECFREEZEONOFF': (18, 7, 'int', 1, 0, 'rw', 'Adaptive Echo Canceler updates inhibit.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
+    'AECNORM': (18, 19, 'float', 16, 0.25, 'rw', 'Limit on norm of AEC filter coefficients'),
+    'AECPATHCHANGE': (18, 25, 'int', 1, 0, 'ro', 'AEC Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
+    'RT60': (18, 26, 'float', 0.9, 0.25, 'ro', 'Current RT60 estimate in seconds'),
+    'HPFONOFF': (18, 27, 'int', 3, 0, 'rw', 'High-pass Filter on microphone signals.', '0 = OFF', '1 = ON - 70 Hz cut-off', '2 = ON - 125 Hz cut-off', '3 = ON - 180 Hz cut-off'),
+    'RT60ONOFF': (18, 28, 'int', 1, 0, 'rw', 'RT60 Estimation for AES. 0 = OFF 1 = ON'),
+    'AECSILENCELEVEL': (18, 30, 'float', 1, 1e-09, 'rw', 'Threshold for signal detection in AEC [-inf .. 0] dBov (Default: -80dBov = 10log10(1x10-8))'),
+    'AECSILENCEMODE': (18, 31, 'int', 1, 0, 'ro', 'AEC far-end silence detection status. ', '0 = false (signal detected) ', '1 = true (silence detected)'),
+    'AGCONOFF': (19, 0, 'int', 1, 0, 'rw', 'Automatic Gain Control. ', '0 = OFF ', '1 = ON'),
+    'AGCMAXGAIN': (19, 1, 'float', 1000, 1, 'rw', 'Maximum AGC gain factor. ', '[0 .. 60] dB (default 30dB = 20log10(31.6))'),
+    'AGCDESIREDLEVEL': (19, 2, 'float', 0.99, 1e-08, 'rw', 'Target power level of the output signal. ', '[-inf .. 0] dBov (default: -23dBov = 10log10(0.005))'),
+    'AGCGAIN': (19, 3, 'float', 1000, 1, 'rw', 'Current AGC gain factor. ', '[0 .. 60] dB (default: 0.0dB = 20log10(1.0))'),
+    'AGCTIME': (19, 4, 'float', 1, 0.1, 'rw', 'Ramps-up / down time-constant in seconds.'),
+    'CNIONOFF': (19, 5, 'int', 1, 0, 'rw', 'Comfort Noise Insertion.', '0 = OFF', '1 = ON'),
+    'FREEZEONOFF': (19, 6, 'int', 1, 0, 'rw', 'Adaptive beamformer updates.', '0 = Adaptation enabled', '1 = Freeze adaptation, filter only'),
+    'STATNOISEONOFF': (19, 8, 'int', 1, 0, 'rw', 'Stationary noise suppression.', '0 = OFF', '1 = ON'),
+    'GAMMA_NS': (19, 9, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise. min .. max attenuation'),
+    'MIN_NS': (19, 10, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
+    'NONSTATNOISEONOFF': (19, 11, 'int', 1, 0, 'rw', 'Non-stationary noise suppression.', '0 = OFF', '1 = ON'),
+    'GAMMA_NN': (19, 12, 'float', 3, 0, 'rw', 'Over-subtraction factor of non- stationary noise. min .. max attenuation'),
+    'MIN_NN': (19, 13, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
+    'ECHOONOFF': (19, 14, 'int', 1, 0, 'rw', 'Echo suppression.', '0 = OFF', '1 = ON'),
+    'GAMMA_E': (19, 15, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (direct and early components). min .. max attenuation'),
+    'GAMMA_ETAIL': (19, 16, 'float', 3, 0, 'rw', 'Over-subtraction factor of echo (tail components). min .. max attenuation'),
+    'GAMMA_ENL': (19, 17, 'float', 5, 0, 'rw', 'Over-subtraction factor of non-linear echo. min .. max attenuation'),
+    'NLATTENONOFF': (19, 18, 'int', 1, 0, 'rw', 'Non-Linear echo attenuation.', '0 = OFF', '1 = ON'),
+    'NLAEC_MODE': (19, 20, 'int', 2, 0, 'rw', 'Non-Linear AEC training mode.', '0 = OFF', '1 = ON - phase 1', '2 = ON - phase 2'),
+    'SPEECHDETECTED': (19, 22, 'int', 1, 0, 'ro', 'Speech detection status.', '0 = false (no speech detected)', '1 = true (speech detected)'),
+    'FSBUPDATED': (19, 23, 'int', 1, 0, 'ro', 'FSB Update Decision.', '0 = false (FSB was not updated)', '1 = true (FSB was updated)'),
+    'FSBPATHCHANGE': (19, 24, 'int', 1, 0, 'ro', 'FSB Path Change Detection.', '0 = false (no path change detected)', '1 = true (path change detected)'),
+    'TRANSIENTONOFF': (19, 29, 'int', 1, 0, 'rw', 'Transient echo suppression.', '0 = OFF', '1 = ON'),
+    'VOICEACTIVITY': (19, 32, 'int', 1, 0, 'ro', 'VAD voice activity status.', '0 = false (no voice activity)', '1 = true (voice activity)'),
+    'STATNOISEONOFF_SR': (19, 33, 'int', 1, 0, 'rw', 'Stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
+    'NONSTATNOISEONOFF_SR': (19, 34, 'int', 1, 0, 'rw', 'Non-stationary noise suppression for ASR.', '0 = OFF', '1 = ON'),
+    'GAMMA_NS_SR': (19, 35, 'float', 3, 0, 'rw', 'Over-subtraction factor of stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.0)'),
+    'GAMMA_NN_SR': (19, 36, 'float', 3, 0, 'rw', 'Over-subtraction factor of non-stationary noise for ASR. ', '[0.0 .. 3.0] (default: 1.1)'),
+    'MIN_NS_SR': (19, 37, 'float', 1, 0, 'rw', 'Gain-floor for stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -16dB = 20log10(0.15))'),
+    'MIN_NN_SR': (19, 38, 'float', 1, 0, 'rw', 'Gain-floor for non-stationary noise suppression for ASR.', '[-inf .. 0] dB (default: -10dB = 20log10(0.3))'),
+    'GAMMAVAD_SR': (19, 39, 'float', 1000, 0, 'rw', 'Set the threshold for voice activity detection.', '[-inf .. 60] dB (default: 3.5dB 20log10(1.5))'),
+    # 'KEYWORDDETECT': (20, 0, 'int', 1, 0, 'ro', 'Keyword detected. Current value so needs polling.'),
+    'DOAANGLE': (21, 0, 'int', 359, 0, 'ro', 'DOA angle. Current value. Orientation depends on build configuration.')
+}
+
+
+class RespeakerInterface(object):
+    VENDOR_ID = 0x2886
+    PRODUCT_ID = 0x0018
+    TIMEOUT = 100000
+
+    def __init__(self):
+        self.dev = usb.core.find(idVendor=self.VENDOR_ID,
+                                 idProduct=self.PRODUCT_ID)
+        if not self.dev:
+            raise RuntimeError("Failed to find Respeaker device")
+        rospy.loginfo("Initializing Respeaker device")
+        try:
+            self.dev.reset()
+        except usb.core.USBError:
+            rospy.logerr(
+                "You may have to give the right permission on respeaker device. "
+                "Please run the command as followings to register udev rules.\n"
+                "$ roscd respeaker_ros \n"
+                "$ sudo cp -f $(rospack find respeaker_ros)/config/60-respeaker.rules /etc/udev/rules.d/60-respeaker.rules \n"
+                "$ sudo systemctl restart udev \n"
+                "You may find further details at https://github.com/jsk-ros-pkg/jsk_3rdparty/blob/master/respeaker_ros/README.md"
+            ) # NOQA
+            raise
+        self.pixel_ring = usb_pixel_ring_v2.PixelRing(self.dev)
+        self.set_led_think()
+        time.sleep(5)  # it will take 5 seconds to re-recognize as audio device
+        self.set_led_trace()
+        rospy.loginfo("Respeaker device initialized (Version: %s)" % self.version)
+
+    def __del__(self):
+        try:
+            self.close()
+        except:
+            pass
+        finally:
+            self.dev = None
+
+    def write(self, name, value):
+        try:
+            data = PARAMETERS[name]
+        except KeyError:
+            return
+
+        if data[5] == 'ro':
+            raise ValueError('{} is read-only'.format(name))
+
+        id = data[0]
+
+        # 4 bytes offset, 4 bytes value, 4 bytes type
+        if data[2] == 'int':
+            payload = struct.pack(b'iii', data[1], int(value), 1)
+        else:
+            payload = struct.pack(b'ifi', data[1], float(value), 0)
+
+        self.dev.ctrl_transfer(
+            usb.util.CTRL_OUT | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
+            0, 0, id, payload, self.TIMEOUT)
+
+    def read(self, name):
+        try:
+            data = PARAMETERS[name]
+        except KeyError:
+            return
+
+        id = data[0]
+
+        cmd = 0x80 | data[1]
+        if data[2] == 'int':
+            cmd |= 0x40
+
+        length = 8
+
+        try:
+            response = self.dev.ctrl_transfer(
+                usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
+                0, cmd, id, length, self.TIMEOUT)
+        except usb.core.USBError as e:
+            rospy.logerr(e)
+            rospy.signal_shutdown('Shutdown this node because of USBError')
+
+        if sys.version_info.major == 2:
+            response = struct.unpack(b'ii', response.tostring())
+        else:
+            response = struct.unpack(b'ii', response.tobytes())
+
+        if data[2] == 'int':
+            result = response[0]
+        else:
+            result = response[0] * (2.**response[1])
+
+        return result
+
+    def set_led_think(self):
+        self.pixel_ring.set_brightness(10)
+        self.pixel_ring.think()
+
+    def set_led_trace(self):
+        self.pixel_ring.set_brightness(20)
+        self.pixel_ring.trace()
+
+    def set_led_color(self, r, g, b, a):
+        self.pixel_ring.set_brightness(int(20 * a))
+        self.pixel_ring.set_color(r=int(r*255), g=int(g*255), b=int(b*255))
+
+    def set_vad_threshold(self, db):
+        self.write('GAMMAVAD_SR', db)
+
+    def is_voice(self):
+        return self.read('VOICEACTIVITY')
+
+    @property
+    def direction(self):
+        return self.read('DOAANGLE')
+
+    @property
+    def version(self):
+        return self.dev.ctrl_transfer(
+            usb.util.CTRL_IN | usb.util.CTRL_TYPE_VENDOR | usb.util.CTRL_RECIPIENT_DEVICE,
+            0, 0x80, 0, 1, self.TIMEOUT)[0]
+
+    def close(self):
+        """
+        close the interface
+        """
+        usb.util.dispose_resources(self.dev)
+
+
+class RespeakerAudio(object):
+    def __init__(self, on_audio, channel=0, suppress_error=True, sample_duration=None):
+        self.on_audio = on_audio
+        with ignore_stderr(enable=suppress_error):
+            self.pyaudio = pyaudio.PyAudio()
+        self.channels = None
+        self.channel = channel
+        self.device_index = None
+        self.rate = 16000
+        self.bitwidth = 2
+        self.bitdepth = 16
+        if sample_duration is not None:
+            frames_per_buffer = int(sample_duration * self.rate)
+        else:
+            frames_per_buffer = 1024
+
+        # find device
+        count = self.pyaudio.get_device_count()
+        rospy.logdebug("%d audio devices found" % count)
+        for i in range(count):
+            info = self.pyaudio.get_device_info_by_index(i)
+            name = info["name"].encode("utf-8")
+            chan = info["maxInputChannels"]
+            rospy.logdebug(" - %d: %s" % (i, name))
+            if name.lower().find(b"respeaker") >= 0:
+                self.channels = chan
+                self.device_index = i
+                rospy.loginfo("Found %d: %s (channels: %d)" % (i, name, chan))
+                break
+        if self.device_index is None:
+            rospy.logwarn("Failed to find respeaker device by name. Using default input")
+            info = self.pyaudio.get_default_input_device_info()
+            self.channels = info["maxInputChannels"]
+            self.device_index = info["index"]
+
+        if self.channels != 6:
+            rospy.logwarn("%d channel is found for respeaker" % self.channels)
+            rospy.logwarn("You may have to update firmware.")
+        self.channel = min(self.channels - 1, max(0, self.channel))
+
+        self.stream = self.pyaudio.open(
+            input=True, start=False,
+            format=pyaudio.paInt16,
+            channels=self.channels,
+            rate=self.rate,
+            frames_per_buffer=frames_per_buffer,
+            stream_callback=self.stream_callback,
+            input_device_index=self.device_index,
+        )
+
+    def __del__(self):
+        self.stop()
+        try:
+            self.stream.close()
+        except:
+            pass
+        finally:
+            self.stream = None
+        try:
+            self.pyaudio.terminate()
+        except:
+            pass
+
+    def stream_callback(self, in_data, frame_count, time_info, status):
+        # split channel
+        data = np.frombuffer(in_data, dtype=np.int16)
+        chunk_per_channel = int(len(data) / self.channels)
+        data = np.reshape(data, (chunk_per_channel, self.channels))
+        chan_data = data[:, self.channel]
+        # invoke callback
+        self.on_audio(chan_data.tobytes())
+        return None, pyaudio.paContinue
+
+    def start(self):
+        if self.stream.is_stopped():
+            self.stream.start_stream()
+
+    def stop(self):
+        if self.stream.is_active():
+            self.stream.stop_stream()
+
+