Merge pull request #246 from pollen-robotics/244-expose-direction-of-arrival

FabienDanieau · web-flow · commit 937867df371e · 2025-10-17T13:45:07.000+02:00
244 expose direction of arrival
diff --git a/examples/debug/sound_doa.py b/examples/debug/sound_doa.py
@@ -0,0 +1,56 @@
+"""Reachy Mini sound playback example.
+
+Open a wav and push samples to the speaker. This is a toy example, in real
+conditions output from a microphone or a text-to-speech engine would be
+ pushed to the speaker instead.
+"""
+
+import logging
+import time
+
+import numpy as np
+
+from reachy_mini import ReachyMini
+
+
+def main() -> None:
+    """Play a wav file by pushing samples to the audio device."""
+    logging.basicConfig(
+        level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(message)s"
+    )
+
+    with ReachyMini(log_level="DEBUG", automatic_body_yaw=True) as mini:
+        last_doa = -1
+        THRESHOLD = 0.004  # ~2 degrees
+        while True:
+            doa = mini.media.audio.get_DoA()
+            print(f"DOA: {doa}")
+            if doa[1] and np.abs(doa[0] - last_doa) > THRESHOLD:
+                print(f"  Speech detected at {doa[0]:.1f}°")
+                p_head = [np.sin(doa[0]), np.cos(doa[0]), 0.0]
+                print(
+                    f"  Pointing to x={p_head[0]:.2f}, y={p_head[1]:.2f}, z={p_head[2]:.2f}"
+                )
+                T_world_head = mini.get_current_head_pose()
+                R_world_head = T_world_head[:3, :3]
+                p_world = R_world_head @ p_head
+                print(
+                    f"  In world coordinates: x={p_world[0]:.2f}, y={p_world[1]:.2f}, z={p_world[2]:.2f}"
+                )
+                mini.look_at_world(*p_world, duration=0.5)
+                last_doa = doa[0]
+            else:
+                if not doa[1]:
+                    print("  No speech detected")
+                else:
+                    print(
+                        f"  Small change in DOA: {doa[0]:.1f}° (last was {last_doa:.1f}°). Not moving."
+                    )
+                time.sleep(0.5)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("Exiting...")
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies = [
     "asgiref",
     "aiohttp",
     "log-throttling==0.0.3",
+    "pyusb>=1.2.1",
 ]
 
 
diff --git a/src/reachy_mini/media/audio_base.py b/src/reachy_mini/media/audio_base.py
@@ -5,12 +5,14 @@
 """
 
 import logging
+import struct
 from abc import ABC, abstractmethod
 from enum import Enum
-from typing import Optional
+from typing import List, Optional
 
 import numpy as np
 import numpy.typing as npt
+import usb
 
 
 class AudioBackend(Enum):
@@ -24,12 +26,26 @@ class AudioBase(ABC):
     """Abstract class for opening and managing audio devices."""
 
     SAMPLE_RATE = 16000  # respeaker samplerate
+    TIMEOUT = 100000
+    PARAMETERS = {
+        "VERSION": (48, 0, 4, "ro", "uint8"),
+        "AEC_AZIMUTH_VALUES": (33, 75, 16 + 1, "ro", "radians"),
+        "DOA_VALUE": (20, 18, 4 + 1, "ro", "uint16"),
+        "DOA_VALUE_RADIANS": (20, 19, 8 + 1, "ro", "radians"),
+    }
 
     def __init__(self, backend: AudioBackend, log_level: str = "INFO") -> None:
         """Initialize the audio device."""
         self.logger = logging.getLogger(__name__)
         self.logger.setLevel(log_level)
         self.backend = backend
+        self._respeaker = self._init_respeaker_usb()
+        # name, resid, cmdid, length, type
+
+    def __del__(self) -> None:
+        """Destructor to ensure resources are released."""
+        if self._respeaker:
+            usb.util.dispose_resources(self._respeaker)
 
     @abstractmethod
     def start_recording(self) -> None:
@@ -70,3 +86,72 @@ def play_sound(self, sound_file: str) -> None:
 
         """
         pass
+
+    def _init_respeaker_usb(self) -> Optional[usb.core.Device]:
+        dev = usb.core.find(idVendor=0x2886, idProduct=0x001A)
+        if not dev:
+            return None
+
+        return dev
+
+    def _read_usb(self, name: str) -> Optional[List[int] | List[float]]:
+        try:
+            data = self.PARAMETERS[name]
+        except KeyError:
+            self.logger.error(f"Unknown parameter: {name}")
+            return None
+
+        if not self._respeaker:
+            self.logger.warning("ReSpeaker device not found.")
+            return None
+
+        resid = data[0]
+        cmdid = 0x80 | data[1]
+        length = data[2]
+
+        response = self._respeaker.ctrl_transfer(
+            usb.util.CTRL_IN
+            | usb.util.CTRL_TYPE_VENDOR
+            | usb.util.CTRL_RECIPIENT_DEVICE,
+            0,
+            cmdid,
+            resid,
+            length,
+            self.TIMEOUT,
+        )
+
+        self.logger.debug(f"Response for {name}: {response}")
+
+        result: Optional[List[float] | List[int]] = None
+        if data[4] == "uint8":
+            result = response.tolist()
+        elif data[4] == "radians":
+            byte_data = response.tobytes()
+            num_values = (data[2] - 1) / 4
+            match_str = "<"
+            for i in range(int(num_values)):
+                match_str += "f"
+            result = [
+                float(x) for x in struct.unpack(match_str, byte_data[1 : data[2]])
+            ]
+        elif data[4] == "uint16":
+            result = response.tolist()
+
+        return result
+
+    def get_DoA(self) -> tuple[float, bool] | None:
+        """Get the Direction of Arrival (DoA) value from the ReSpeaker device.
+
+        0° is left, 90° is front/back, 180° is right
+
+        Returns:
+            tuple: A tuple containing the DoA value as an integer and the speech detection, or None if the device is not found.
+
+        """
+        if not self._respeaker:
+            self.logger.warning("ReSpeaker device not found.")
+            return None
+        result = self._read_usb("DOA_VALUE_RADIANS")
+        if result is None:
+            return None
+        return float(result[0]), bool(result[1])
diff --git a/src/reachy_mini/media/audio_gstreamer.py b/src/reachy_mini/media/audio_gstreamer.py
@@ -87,6 +87,7 @@ def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None:
 
     def __del__(self) -> None:
         """Destructor to ensure gstreamer resources are released."""
+        super().__del__()
         self._loop.quit()
         self._bus_record.remove_watch()
         self._bus_playback.remove_watch()
diff --git a/tests/test_audio.py b/tests/test_audio.py
@@ -38,6 +38,18 @@ def test_record_audio_and_file_exists():
     os.remove(tmpfile.name)
     # print(f"Recorded audio saved to {tmpfile.name}")
 
+@pytest.mark.audio
+def test_DoA():
+    """Test Direction of Arrival (DoA) estimation."""
+    media = MediaManager(backend=MediaBackend.DEFAULT_NO_VIDEO)
+    doa = media.audio.get_DoA()
+    assert doa is not None
+    assert isinstance(doa, tuple)
+    assert len(doa) == 2
+    assert isinstance(doa[0], int)
+    assert isinstance(doa[1], bool)
+
+
 '''
 @pytest.mark.audio_gstreamer
 def test_play_sound_gstreamer_backend():

Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@ dependencies = [`
`28`	`28`	`"asgiref",`
`29`	`29`	`"aiohttp",`
`30`	`30`	`"log-throttling==0.0.3",`
	`31`	`+ "pyusb>=1.2.1",`
`31`	`32`	`]`
`32`	`33`
`33`	`34`