Skip to content

Commit 937867d

Browse files
Merge pull request #246 from pollen-robotics/244-expose-direction-of-arrival
244 expose direction of arrival
2 parents 245cfbc + f2d0bd2 commit 937867d

File tree

5 files changed

+156
-1
lines changed

5 files changed

+156
-1
lines changed

examples/debug/sound_doa.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
"""Reachy Mini sound playback example.
2+
3+
Open a wav and push samples to the speaker. This is a toy example, in real
4+
conditions output from a microphone or a text-to-speech engine would be
5+
pushed to the speaker instead.
6+
"""
7+
8+
import logging
9+
import time
10+
11+
import numpy as np
12+
13+
from reachy_mini import ReachyMini
14+
15+
16+
def main() -> None:
17+
"""Play a wav file by pushing samples to the audio device."""
18+
logging.basicConfig(
19+
level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(message)s"
20+
)
21+
22+
with ReachyMini(log_level="DEBUG", automatic_body_yaw=True) as mini:
23+
last_doa = -1
24+
THRESHOLD = 0.004 # ~2 degrees
25+
while True:
26+
doa = mini.media.audio.get_DoA()
27+
print(f"DOA: {doa}")
28+
if doa[1] and np.abs(doa[0] - last_doa) > THRESHOLD:
29+
print(f" Speech detected at {doa[0]:.1f}°")
30+
p_head = [np.sin(doa[0]), np.cos(doa[0]), 0.0]
31+
print(
32+
f" Pointing to x={p_head[0]:.2f}, y={p_head[1]:.2f}, z={p_head[2]:.2f}"
33+
)
34+
T_world_head = mini.get_current_head_pose()
35+
R_world_head = T_world_head[:3, :3]
36+
p_world = R_world_head @ p_head
37+
print(
38+
f" In world coordinates: x={p_world[0]:.2f}, y={p_world[1]:.2f}, z={p_world[2]:.2f}"
39+
)
40+
mini.look_at_world(*p_world, duration=0.5)
41+
last_doa = doa[0]
42+
else:
43+
if not doa[1]:
44+
print(" No speech detected")
45+
else:
46+
print(
47+
f" Small change in DOA: {doa[0]:.1f}° (last was {last_doa:.1f}°). Not moving."
48+
)
49+
time.sleep(0.5)
50+
51+
52+
if __name__ == "__main__":
53+
try:
54+
main()
55+
except KeyboardInterrupt:
56+
print("Exiting...")

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ dependencies = [
2828
"asgiref",
2929
"aiohttp",
3030
"log-throttling==0.0.3",
31+
"pyusb>=1.2.1",
3132
]
3233

3334

src/reachy_mini/media/audio_base.py

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
"""
66

77
import logging
8+
import struct
89
from abc import ABC, abstractmethod
910
from enum import Enum
10-
from typing import Optional
11+
from typing import List, Optional
1112

1213
import numpy as np
1314
import numpy.typing as npt
15+
import usb
1416

1517

1618
class AudioBackend(Enum):
@@ -24,12 +26,26 @@ class AudioBase(ABC):
2426
"""Abstract class for opening and managing audio devices."""
2527

2628
SAMPLE_RATE = 16000 # respeaker samplerate
29+
TIMEOUT = 100000
30+
PARAMETERS = {
31+
"VERSION": (48, 0, 4, "ro", "uint8"),
32+
"AEC_AZIMUTH_VALUES": (33, 75, 16 + 1, "ro", "radians"),
33+
"DOA_VALUE": (20, 18, 4 + 1, "ro", "uint16"),
34+
"DOA_VALUE_RADIANS": (20, 19, 8 + 1, "ro", "radians"),
35+
}
2736

2837
def __init__(self, backend: AudioBackend, log_level: str = "INFO") -> None:
2938
"""Initialize the audio device."""
3039
self.logger = logging.getLogger(__name__)
3140
self.logger.setLevel(log_level)
3241
self.backend = backend
42+
self._respeaker = self._init_respeaker_usb()
43+
# name, resid, cmdid, length, type
44+
45+
def __del__(self) -> None:
46+
"""Destructor to ensure resources are released."""
47+
if self._respeaker:
48+
usb.util.dispose_resources(self._respeaker)
3349

3450
@abstractmethod
3551
def start_recording(self) -> None:
@@ -70,3 +86,72 @@ def play_sound(self, sound_file: str) -> None:
7086
7187
"""
7288
pass
89+
90+
def _init_respeaker_usb(self) -> Optional[usb.core.Device]:
91+
dev = usb.core.find(idVendor=0x2886, idProduct=0x001A)
92+
if not dev:
93+
return None
94+
95+
return dev
96+
97+
def _read_usb(self, name: str) -> Optional[List[int] | List[float]]:
98+
try:
99+
data = self.PARAMETERS[name]
100+
except KeyError:
101+
self.logger.error(f"Unknown parameter: {name}")
102+
return None
103+
104+
if not self._respeaker:
105+
self.logger.warning("ReSpeaker device not found.")
106+
return None
107+
108+
resid = data[0]
109+
cmdid = 0x80 | data[1]
110+
length = data[2]
111+
112+
response = self._respeaker.ctrl_transfer(
113+
usb.util.CTRL_IN
114+
| usb.util.CTRL_TYPE_VENDOR
115+
| usb.util.CTRL_RECIPIENT_DEVICE,
116+
0,
117+
cmdid,
118+
resid,
119+
length,
120+
self.TIMEOUT,
121+
)
122+
123+
self.logger.debug(f"Response for {name}: {response}")
124+
125+
result: Optional[List[float] | List[int]] = None
126+
if data[4] == "uint8":
127+
result = response.tolist()
128+
elif data[4] == "radians":
129+
byte_data = response.tobytes()
130+
num_values = (data[2] - 1) / 4
131+
match_str = "<"
132+
for i in range(int(num_values)):
133+
match_str += "f"
134+
result = [
135+
float(x) for x in struct.unpack(match_str, byte_data[1 : data[2]])
136+
]
137+
elif data[4] == "uint16":
138+
result = response.tolist()
139+
140+
return result
141+
142+
def get_DoA(self) -> tuple[float, bool] | None:
143+
"""Get the Direction of Arrival (DoA) value from the ReSpeaker device.
144+
145+
0° is left, 90° is front/back, 180° is right
146+
147+
Returns:
148+
tuple: A tuple containing the DoA value as an integer and the speech detection, or None if the device is not found.
149+
150+
"""
151+
if not self._respeaker:
152+
self.logger.warning("ReSpeaker device not found.")
153+
return None
154+
result = self._read_usb("DOA_VALUE_RADIANS")
155+
if result is None:
156+
return None
157+
return float(result[0]), bool(result[1])

src/reachy_mini/media/audio_gstreamer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None:
8787

8888
def __del__(self) -> None:
8989
"""Destructor to ensure gstreamer resources are released."""
90+
super().__del__()
9091
self._loop.quit()
9192
self._bus_record.remove_watch()
9293
self._bus_playback.remove_watch()

tests/test_audio.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,18 @@ def test_record_audio_and_file_exists():
3838
os.remove(tmpfile.name)
3939
# print(f"Recorded audio saved to {tmpfile.name}")
4040

41+
@pytest.mark.audio
42+
def test_DoA():
43+
"""Test Direction of Arrival (DoA) estimation."""
44+
media = MediaManager(backend=MediaBackend.DEFAULT_NO_VIDEO)
45+
doa = media.audio.get_DoA()
46+
assert doa is not None
47+
assert isinstance(doa, tuple)
48+
assert len(doa) == 2
49+
assert isinstance(doa[0], int)
50+
assert isinstance(doa[1], bool)
51+
52+
4153
'''
4254
@pytest.mark.audio_gstreamer
4355
def test_play_sound_gstreamer_backend():

0 commit comments

Comments
 (0)