Skip to content

Commit 443d60a

Browse files
oliviamillerviambotlia-viam
authored
RSDK-12148 RSDK-12149 AudioIn and AudioOut wrappers (#1021)
Co-authored-by: viambot <[email protected]> Co-authored-by: Lia Stratopoulos <[email protected]>
1 parent 2a15499 commit 443d60a

File tree

18 files changed

+1157
-1
lines changed

18 files changed

+1157
-1
lines changed

examples/apis.json

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,5 +106,13 @@
106106
"func": "get_point_cloud_map",
107107
"packagePath": "viam.services",
108108
"importName": "SLAMClient"
109+
},
110+
"audio_in": {
111+
"func": "get_properties",
112+
"packagePath": "viam.components"
113+
},
114+
"audio_out": {
115+
"func": "get_properties",
116+
"packagePath": "viam.components"
109117
}
110118
}

examples/server/v1/components.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
from PIL import Image
1919

2020
from viam.components.arm import Arm
21+
from viam.components.audio_in import AudioIn, AudioResponse
2122
from viam.components.audio_input import AudioInput
23+
from viam.components.audio_out import AudioOut
2224
from viam.components.base import Base
2325
from viam.components.board import Board, TickStream
2426
from viam.components.camera import Camera
@@ -52,6 +54,8 @@
5254
from viam.proto.component.audioinput import AudioChunk, AudioChunkInfo, SampleFormat
5355
from viam.proto.component.encoder import PositionType
5456
from viam.utils import SensorReading
57+
from viam.proto.component.audioin import AudioChunk as AudioInChunk
58+
from viam.proto.common import AudioInfo
5559

5660
GEOMETRIES = [
5761
Geometry(center=Pose(x=1, y=2, z=3, o_x=2, o_y=3, o_z=4, theta=20), sphere=Sphere(radius_mm=2)),
@@ -173,6 +177,121 @@ async def get_geometries(self, extra: Optional[Dict[str, Any]] = None, **kwargs)
173177
return GEOMETRIES
174178

175179

180+
class ExampleAudioIn(AudioIn):
181+
def __init__(self, name: str):
182+
super().__init__(name)
183+
self.sample_rate = 44100
184+
self.num_channels = 2
185+
self.supported_codecs = ["pcm16"]
186+
self.chunk_count = 0
187+
self.latency = timedelta(milliseconds=20)
188+
self.volume_scale = 0.2
189+
self.frequency_hz = 440
190+
191+
async def get_audio(self, codec: str, duration_seconds: float, previous_timestamp_ns: int,
192+
*, timeout: Optional[float] = None, **kwargs) -> AudioIn.AudioStream:
193+
194+
async def read() -> AsyncIterator[AudioIn.AudioResponse]:
195+
# Generate chunks based on duration
196+
chunk_duration_ms = 100 # 100ms per chunk
197+
chunks_to_generate = max(1, int((duration_seconds * 1000) / chunk_duration_ms))
198+
199+
for i in range(chunks_to_generate):
200+
# Generate audio data (sine wave pattern)
201+
chunk_data = b""
202+
samples_per_chunk = int(self.sample_rate * (chunk_duration_ms / 1000))
203+
204+
for sample in range(samples_per_chunk):
205+
# Calculate the timing in seconds of this audio sample
206+
time_offset = (i * chunk_duration_ms / 1000) + (sample / self.sample_rate)
207+
# Generate one 16-bit PCM audio sample for a sine wave
208+
# 32767 scales the value from (-1,1) to full 16 bit signed range (-32768,32767)
209+
amplitude = int(32767 * self.volume_scale * math.sin(2 * math.pi * self.frequency_hz * time_offset))
210+
211+
# Convert to 16-bit PCM stereo
212+
sample_bytes = amplitude.to_bytes(2, byteorder='little', signed=True)
213+
chunk_data += sample_bytes * self.num_channels
214+
215+
chunk_start_time = previous_timestamp_ns + (i * chunk_duration_ms * 1000000) # Convert ms to ns
216+
chunk_end_time = chunk_start_time + (chunk_duration_ms * 1000000)
217+
218+
audio_chunk = AudioInChunk(
219+
audio_data=bytes(chunk_data),
220+
audio_info=AudioInfo(
221+
codec=codec,
222+
sample_rate_hz=int(self.sample_rate),
223+
num_channels=self.num_channels
224+
),
225+
sequence=i,
226+
start_timestamp_nanoseconds=chunk_start_time,
227+
end_timestamp_nanoseconds=chunk_end_time
228+
)
229+
audio_response = AudioResponse(audio=audio_chunk)
230+
yield audio_response
231+
232+
await asyncio.sleep(self.latency.total_seconds())
233+
234+
return StreamWithIterator(read())
235+
236+
async def get_properties(self, *, timeout: Optional[float] = None, **kwargs) -> AudioIn.Properties:
237+
"""Return the audio input device properties."""
238+
return AudioIn.Properties(
239+
supported_codecs=self.supported_codecs,
240+
sample_rate_hz=self.sample_rate,
241+
num_channels=self.num_channels
242+
)
243+
244+
async def get_geometries(self, extra: Optional[Dict[str, Any]] = None, **kwargs) -> List[Geometry]:
245+
return GEOMETRIES
246+
247+
248+
class ExampleAudioOut(AudioOut):
249+
def __init__(self, name: str):
250+
super().__init__(name)
251+
self.sample_rate = 44100
252+
self.num_channels = 2
253+
self.supported_codecs = ["pcm16", "mp3", "wav"]
254+
self.volume = 1.0
255+
self.is_playing = False
256+
257+
async def play(self,
258+
data: bytes,
259+
info: Optional[AudioInfo] = None,
260+
*,
261+
extra: Optional[Dict[str, Any]] = None,
262+
timeout: Optional[float] = None,
263+
**kwargs) -> None:
264+
"""Play the given audio data."""
265+
266+
# Simulate playing audio
267+
self.is_playing = True
268+
if info:
269+
print(f"Playing audio: {len(data)} bytes, codec={info.codec}, "
270+
f"sample_rate={info.sample_rate_hz}, channels={info.num_channels}")
271+
else:
272+
print(f"Playing audio: {len(data)} bytes (no audio info provided)")
273+
274+
await asyncio.sleep(0.1)
275+
276+
self.is_playing = False
277+
278+
async def get_properties(self,
279+
*,
280+
extra: Optional[Dict[str, Any]] = None,
281+
timeout: Optional[float] = None,
282+
**kwargs) -> AudioOut.Properties:
283+
"""Return the audio output device properties."""
284+
285+
return AudioOut.Properties(
286+
supported_codecs=self.supported_codecs,
287+
sample_rate_hz=self.sample_rate,
288+
num_channels=self.num_channels
289+
)
290+
291+
async def get_geometries(self, extra: Optional[Dict[str, Any]] = None, **kwargs) -> List[Geometry]:
292+
return GEOMETRIES
293+
294+
176295
class ExampleBase(Base):
177296
def __init__(self, name: str):
178297
self.position = 0

examples/server/v1/server.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
from .components import (
99
ExampleAnalog,
1010
ExampleArm,
11+
ExampleAudioIn,
1112
ExampleAudioInput,
13+
ExampleAudioOut,
1214
ExampleBase,
1315
ExampleBoard,
1416
ExampleCamera,
@@ -30,6 +32,8 @@
3032
async def run(host: str, port: int, log_level: int):
3133
my_arm = ExampleArm("arm0")
3234
my_audio_input = ExampleAudioInput("audio_input0")
35+
my_audio_in = ExampleAudioIn("audio_in0")
36+
my_audio_out = ExampleAudioOut("audio_out0")
3337
my_base = ExampleBase("base0")
3438
my_board = ExampleBoard(
3539
name="board",
@@ -75,7 +79,9 @@ async def run(host: str, port: int, log_level: int):
7579
server = Server(
7680
resources=[
7781
my_arm,
82+
my_audio_in,
7883
my_audio_input,
84+
my_audio_out,
7985
my_base,
8086
my_board,
8187
my_camera,
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from viam.resource.registry import Registry, ResourceRegistration
2+
3+
from viam.proto.common import AudioInfo
4+
from viam.media.audio import AudioCodec
5+
from .audio_in import AudioIn
6+
from .client import AudioInClient
7+
from .service import AudioInRPCService
8+
9+
AudioResponse = AudioIn.AudioResponse
10+
11+
__all__ = [
12+
"AudioIn",
13+
"AudioResponse",
14+
"AudioInfo",
15+
"AudioCodec",
16+
]
17+
18+
Registry.register_api(
19+
ResourceRegistration(
20+
AudioIn,
21+
AudioInRPCService,
22+
lambda name, channel: AudioInClient(name, channel),
23+
)
24+
)
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import abc
2+
import sys
3+
from typing import Final, Optional
4+
5+
from viam.streams import Stream
6+
7+
from viam.proto.common import GetPropertiesResponse
8+
from viam.proto.component.audioin import GetAudioResponse
9+
from viam.resource.types import API, RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_COMPONENT
10+
11+
from ..component_base import ComponentBase
12+
13+
if sys.version_info >= (3, 10):
14+
from typing import TypeAlias
15+
else:
16+
from typing_extensions import TypeAlias
17+
18+
19+
class AudioIn(ComponentBase):
20+
"""AudioIn represents a component that can capture audio.
21+
22+
This acts as an abstract base class for any drivers representing specific
23+
audio input implementations. This cannot be used on its own. If the ``__init__()`` function is
24+
overridden, it must call the ``super().__init__()`` function.
25+
"""
26+
27+
API: Final = API( # pyright: ignore [reportIncompatibleVariableOverride]
28+
RESOURCE_NAMESPACE_RDK, RESOURCE_TYPE_COMPONENT, "audio_in"
29+
)
30+
31+
Properties: "TypeAlias" = GetPropertiesResponse
32+
AudioResponse: "TypeAlias" = GetAudioResponse
33+
AudioStream = Stream[AudioResponse]
34+
35+
36+
@abc.abstractmethod
37+
async def get_audio(self, codec: str,
38+
duration_seconds: float,
39+
previous_timestamp_ns:int,
40+
*, timeout: Optional[float] = None, **kwargs) -> AudioStream:
41+
"""
42+
Get a stream of audio from the device
43+
44+
::
45+
my_audio_in = AudioIn.from_robot(robot=machine, name="my_audio_in")
46+
47+
stream = await my_audio_in.get_audio(
48+
codec=AudioCodec.PCM16,
49+
duration_seconds=10.0,
50+
previous_timestamp_ns=0
51+
)
52+
53+
Args:
54+
codec (str): The desired codec of the returned audio data
55+
duration_seconds (float): duration of the stream. 0 = indefinite stream
56+
previous_timestamp_ns (int): starting timestamp in nanoseconds for recording continuity.
57+
Set to 0 to begin recording from the current time.
58+
Returns:
59+
AudioStream: stream of audio chunks.
60+
...
61+
"""
62+
63+
@abc.abstractmethod
64+
async def get_properties(self, *, timeout: Optional[float] = None, **kwargs) -> Properties:
65+
"""
66+
Get the audio device's properties
67+
68+
::
69+
my_audio_in = AudioIn.from_robot(robot=machine, name="my_audio_in")
70+
properties = await my_audio_in.get_properties()
71+
72+
Returns:
73+
Properties: The properties of the audio in device.
74+
...
75+
"""
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from typing import Any, Dict, List, Mapping, Optional
2+
import uuid
3+
4+
from grpclib.client import Channel
5+
6+
from viam.proto.component.audioin import GetAudioRequest, GetAudioResponse
7+
from viam.proto.common import (
8+
DoCommandRequest,
9+
DoCommandResponse,
10+
GetPropertiesRequest,
11+
Geometry)
12+
from grpclib.client import Stream as ClientStream
13+
from viam.proto.component.audioin import AudioInServiceStub
14+
from viam.resource.rpc_client_base import ReconfigurableResourceRPCClientBase
15+
from viam.streams import StreamWithIterator
16+
17+
from .audio_in import AudioIn
18+
from viam.utils import ValueTypes, dict_to_struct, get_geometries, struct_to_dict
19+
20+
21+
class AudioInClient(AudioIn, ReconfigurableResourceRPCClientBase):
22+
23+
def __init__(self, name: str, channel: Channel) -> None:
24+
self.channel = channel
25+
self.client = AudioInServiceStub(channel)
26+
super().__init__(name)
27+
28+
29+
async def get_audio(self,
30+
codec:str,
31+
duration_seconds: float,
32+
previous_timestamp_ns:int,
33+
*,
34+
extra: Optional[Dict[str, Any]] = None,
35+
**kwargs,
36+
):
37+
request = GetAudioRequest(name=self.name, codec = codec,
38+
duration_seconds=duration_seconds,
39+
previous_timestamp_nanoseconds = previous_timestamp_ns,
40+
request_id = str(uuid.uuid4()),
41+
extra=dict_to_struct(extra))
42+
async def read():
43+
md = kwargs.get("metadata", self.Metadata()).proto
44+
audio_stream: ClientStream[GetAudioRequest, GetAudioResponse]
45+
async with self.client.GetAudio.open(metadata=md) as audio_stream:
46+
try:
47+
await audio_stream.send_message(request, end=True)
48+
async for response in audio_stream:
49+
yield response
50+
except Exception as e:
51+
raise (e)
52+
53+
return StreamWithIterator(read())
54+
55+
56+
async def get_properties(
57+
self,
58+
*,
59+
timeout: Optional[float] = None,
60+
**kwargs,
61+
) -> AudioIn.Properties:
62+
md = kwargs.get("metadata", self.Metadata()).proto
63+
return await self.client.GetProperties(GetPropertiesRequest(name=self.name), timeout=timeout, metadata=md)
64+
65+
async def do_command(
66+
self,
67+
command: Mapping[str, ValueTypes],
68+
*,
69+
timeout: Optional[float] = None,
70+
**kwargs,
71+
) -> Mapping[str, ValueTypes]:
72+
md = kwargs.get("metadata", self.Metadata()).proto
73+
request = DoCommandRequest(name=self.name, command=dict_to_struct(command))
74+
response: DoCommandResponse = await self.client.DoCommand(request, timeout=timeout, metadata=md)
75+
return struct_to_dict(response.result)
76+
77+
async def get_geometries(self, *, extra: Optional[Dict[str, Any]] = None, timeout: Optional[float] = None, **kwargs) -> List[Geometry]:
78+
md = kwargs.get("metadata", self.Metadata())
79+
return await get_geometries(self.client, self.name, extra, timeout, md)

0 commit comments

Comments
 (0)