Skip to content

Commit 245cfbc

Browse files
Merge pull request #307 from pollen-robotics/306-sounddevice-may-not-open-the-device-properly
bug #306: soundevice fails if respeaker is selected in system default
2 parents f5d3420 + 60c9cbc commit 245cfbc

File tree

6 files changed

+42
-38
lines changed

6 files changed

+42
-38
lines changed

examples/debug/sound_record.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
OUTPUT_FILE = "recorded_audio.wav"
1414

1515

16-
def main(backend: str):
16+
def main(backend: str) -> None:
1717
"""Record audio for 5 seconds and save to a WAV file."""
1818
logging.basicConfig(
1919
level=logging.DEBUG, format="%(asctime)s [%(levelname)s] %(message)s"
@@ -28,7 +28,7 @@ def main(backend: str):
2828
sample = mini.media.get_audio_sample()
2929
if sample is not None:
3030
if backend == "gstreamer":
31-
sample = np.frombuffer(sample, dtype=np.int16).reshape(-1, 1)
31+
sample = np.frombuffer(sample, dtype=np.float32).reshape(-1, 1)
3232
audio_samples.append(sample)
3333
else:
3434
print("No audio data available yet...")

src/reachy_mini/media/audio_base.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class AudioBackend(Enum):
2323
class AudioBase(ABC):
2424
"""Abstract class for opening and managing audio devices."""
2525

26+
SAMPLE_RATE = 16000 # respeaker samplerate
27+
2628
def __init__(self, backend: AudioBackend, log_level: str = "INFO") -> None:
2729
"""Initialize the audio device."""
2830
self.logger = logging.getLogger(__name__)
@@ -39,11 +41,6 @@ def get_audio_sample(self) -> Optional[bytes | npt.NDArray[np.float32]]:
3941
"""Read audio data from the device. Returns the data or None if error."""
4042
pass
4143

42-
@abstractmethod
43-
def get_audio_samplerate(self) -> int:
44-
"""Return the samplerate of the audio device."""
45-
pass
46-
4744
@abstractmethod
4845
def stop_recording(self) -> None:
4946
"""Close the audio device and release resources."""

src/reachy_mini/media/audio_gstreamer.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@ def __init__(self, log_level: str = "INFO") -> None:
3535
self._thread_bus_calls = Thread(target=lambda: self._loop.run(), daemon=True)
3636
self._thread_bus_calls.start()
3737

38-
self._samplerate = 24000
39-
4038
self._pipeline_record = Gst.Pipeline.new("audio_recorder")
4139
self._appsink_audio: Optional[GstApp] = None
4240
self._init_pipeline_record(self._pipeline_record)
@@ -56,7 +54,7 @@ def __init__(self, log_level: str = "INFO") -> None:
5654
def _init_pipeline_record(self, pipeline: Gst.Pipeline) -> None:
5755
self._appsink_audio = Gst.ElementFactory.make("appsink")
5856
caps = Gst.Caps.from_string(
59-
f"audio/x-raw,channels=1,rate={self._samplerate},format=S16LE"
57+
f"audio/x-raw,channels=2,rate={self.SAMPLE_RATE},format=F32LE"
6058
)
6159
self._appsink_audio.set_property("caps", caps)
6260
self._appsink_audio.set_property("drop", True) # avoid overflow
@@ -98,7 +96,7 @@ def _init_pipeline_playback(self, pipeline: Gst.Pipeline) -> None:
9896
self._appsrc.set_property("format", Gst.Format.TIME)
9997
self._appsrc.set_property("is-live", True)
10098
caps = Gst.Caps.from_string(
101-
f"audio/x-raw,format=F32LE,channels=1,rate={self._samplerate},layout=interleaved"
99+
f"audio/x-raw,format=F32LE,channels=1,rate={self.SAMPLE_RATE},layout=interleaved"
102100
)
103101
self._appsrc.set_property("caps", caps)
104102

@@ -136,7 +134,7 @@ def start_recording(self) -> None:
136134
"""Open the audio card using GStreamer."""
137135
self._pipeline_record.set_state(Gst.State.PLAYING)
138136

139-
def _get_sample(self, appsink: Gst.AppSink) -> Optional[bytes]:
137+
def _get_sample(self, appsink: GstApp.AppSink) -> Optional[bytes]:
140138
sample = appsink.try_pull_sample(20_000_000)
141139
if sample is None:
142140
return None
@@ -158,10 +156,6 @@ def get_audio_sample(self) -> Optional[bytes]:
158156
"""
159157
return self._get_sample(self._appsink_audio)
160158

161-
def get_audio_samplerate(self) -> int:
162-
"""Return the samplerate of the audio device."""
163-
return self._samplerate
164-
165159
def stop_recording(self) -> None:
166160
"""Release the camera resource."""
167161
self._pipeline_record.set_state(Gst.State.NULL)

src/reachy_mini/media/audio_sounddevice.py

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -30,17 +30,16 @@ def __init__(
3030
self.stream = None
3131
self._output_stream = None
3232
self._buffer: List[npt.NDArray[np.float32]] = []
33-
self._device_id = self.get_output_device_id("respeaker")
34-
self._samplerate = (
35-
-1
36-
) # will be set on first use to avoid issues if device is not present (CI)
33+
self._output_device_id = self.get_output_device_id("respeaker")
34+
self._input_device_id = self.get_input_device_id("respeaker")
3735

3836
def start_recording(self) -> None:
3937
"""Open the audio input stream, using ReSpeaker card if available."""
4038
self.stream = sd.InputStream(
4139
blocksize=self.frames_per_buffer,
42-
device=self._device_id,
40+
device=self._input_device_id,
4341
callback=self._callback,
42+
samplerate=self.SAMPLE_RATE,
4443
)
4544
if self.stream is None:
4645
raise RuntimeError("Failed to open SoundDevice audio stream.")
@@ -69,14 +68,6 @@ def get_audio_sample(self) -> Optional[bytes | npt.NDArray[np.float32]]:
6968
self.logger.debug("No audio data available in buffer.")
7069
return None
7170

72-
def get_audio_samplerate(self) -> int:
73-
"""Return the samplerate of the audio device."""
74-
if self._samplerate == -1:
75-
self._samplerate = int(
76-
sd.query_devices(self._device_id)["default_samplerate"]
77-
)
78-
return self._samplerate
79-
8071
def stop_recording(self) -> None:
8172
"""Close the audio stream and release resources."""
8273
if self.stream is not None:
@@ -97,8 +88,8 @@ def push_audio_sample(self, data: bytes) -> None:
9788
def start_playing(self) -> None:
9889
"""Open the audio output stream."""
9990
self._output_stream = sd.OutputStream(
100-
samplerate=self.get_audio_samplerate(),
101-
device=self._device_id,
91+
samplerate=self.SAMPLE_RATE,
92+
device=self._output_device_id,
10293
channels=1,
10394
)
10495
if self._output_stream is None:
@@ -121,9 +112,9 @@ def play_sound(self, sound_file: str, autoclean: bool = False) -> None:
121112

122113
data, samplerate_in = sf.read(file_path, dtype="float32")
123114

124-
if samplerate_in != self.get_audio_samplerate():
115+
if samplerate_in != self.SAMPLE_RATE:
125116
data = scipy.signal.resample(
126-
data, int(len(data) * (self.get_audio_samplerate() / samplerate_in))
117+
data, int(len(data) * (self.SAMPLE_RATE / samplerate_in))
127118
)
128119
if data.ndim > 1: # convert to mono
129120
data = np.mean(data, axis=1)
@@ -157,8 +148,8 @@ def callback(
157148
event = threading.Event()
158149

159150
self._output_stream = sd.OutputStream(
160-
samplerate=self.get_audio_samplerate(),
161-
device=self._device_id,
151+
samplerate=self.SAMPLE_RATE,
152+
device=self._output_device_id,
162153
channels=1,
163154
callback=callback,
164155
finished_callback=event.set, # release the device when done
@@ -198,10 +189,32 @@ def get_output_device_id(self, name_contains: str) -> int:
198189
devices = sd.query_devices()
199190

200191
for idx, dev in enumerate(devices):
201-
if name_contains.lower() in dev["name"].lower():
192+
if (
193+
name_contains.lower() in dev["name"].lower()
194+
and dev["max_output_channels"] > 0
195+
):
202196
return idx
203197
# Return default output device if not found
204198
self.logger.warning(
205199
f"No output device found containing '{name_contains}', using default."
206200
)
207201
return int(sd.default.device[1])
202+
203+
def get_input_device_id(self, name_contains: str) -> int:
204+
"""Return the input device id whose name contains the given string (case-insensitive).
205+
206+
If not found, return the default input device id.
207+
"""
208+
devices = sd.query_devices()
209+
210+
for idx, dev in enumerate(devices):
211+
if (
212+
name_contains.lower() in dev["name"].lower()
213+
and dev["max_input_channels"] > 0
214+
):
215+
return idx
216+
# Return default input device if not found
217+
self.logger.warning(
218+
f"No input device found containing '{name_contains}', using default."
219+
)
220+
return int(sd.default.device[1])

src/reachy_mini/media/camera_gstreamer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def open(self) -> None:
9393
self._thread_bus_calls = Thread(target=self._handle_bus_calls, daemon=True)
9494
self._thread_bus_calls.start()
9595

96-
def _get_sample(self, appsink: Gst.AppSink) -> Optional[bytes]:
96+
def _get_sample(self, appsink: GstApp.AppSink) -> Optional[bytes]:
9797
sample = appsink.try_pull_sample(20_000_000)
9898
if sample is None:
9999
return None

src/reachy_mini/media/media_manager.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def get_audio_samplerate(self) -> int:
150150
if self.audio is None:
151151
self.logger.warning("Audio system is not initialized.")
152152
return -1
153-
return self.audio.get_audio_samplerate()
153+
return self.audio.SAMPLE_RATE
154154

155155
def stop_recording(self) -> None:
156156
"""Stop recording audio."""

0 commit comments

Comments
 (0)