From 47f21cb3ce36f510557497d2ec0dd69bb11e305c Mon Sep 17 00:00:00 2001 From: James Hush Date: Thu, 5 Feb 2026 10:02:24 +0800 Subject: [PATCH 1/6] fix(smallwebrtc): respect audio_out_10ms_chunks parameter in RawAudioTrack The RawAudioTrack class was hardcoded to always produce 10ms audio frames regardless of the audio_out_10ms_chunks transport parameter. This caused firmware clients to receive 20ms chunks even when 40ms was configured. Changes: - Add num_10ms_chunks parameter to RawAudioTrack constructor - Update add_audio_bytes to chunk audio based on configured size - Update recv() to produce frames of the configured size - Pass audio_out_10ms_chunks from TransportParams when creating track --- .../transports/smallwebrtc/transport.py | 35 ++++++++++++------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py index 7e22fb00cd..4740496ff1 100644 --- a/src/pipecat/transports/smallwebrtc/transport.py +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -78,19 +78,24 @@ class RawAudioTrack(AudioStreamTrack): supporting queued audio data with proper synchronization. """ - def __init__(self, sample_rate): + def __init__(self, sample_rate: int, num_10ms_chunks: int = 1): """Initialize the raw audio track. Args: sample_rate: The audio sample rate in Hz. + num_10ms_chunks: Number of 10ms chunks per output frame (default 1). """ super().__init__() self._sample_rate = sample_rate + self._num_10ms_chunks = num_10ms_chunks self._samples_per_10ms = sample_rate * 10 // 1000 self._bytes_per_10ms = self._samples_per_10ms * 2 # 16-bit (2 bytes per sample) + # Calculate chunk size based on num_10ms_chunks + self._samples_per_chunk = self._samples_per_10ms * num_10ms_chunks + self._bytes_per_chunk = self._bytes_per_10ms * num_10ms_chunks self._timestamp = 0 self._start = time.time() - # Queue of (bytes, future), broken into 10ms sub chunks as needed + # Queue of (bytes, future), broken into configured chunk sizes as needed self._chunk_queue = deque() def add_audio_bytes(self, audio_bytes: bytes): @@ -103,17 +108,20 @@ def add_audio_bytes(self, audio_bytes: bytes): A Future that completes when the data is processed. Raises: - ValueError: If audio bytes are not a multiple of 10ms size. + ValueError: If audio bytes are not a multiple of the configured chunk size. """ - if len(audio_bytes) % self._bytes_per_10ms != 0: - raise ValueError("Audio bytes must be a multiple of 10ms size.") + if len(audio_bytes) % self._bytes_per_chunk != 0: + raise ValueError( + f"Audio bytes must be a multiple of {self._num_10ms_chunks * 10}ms size " + f"({self._bytes_per_chunk} bytes)." + ) future = asyncio.get_running_loop().create_future() - # Break input into 10ms chunks - for i in range(0, len(audio_bytes), self._bytes_per_10ms): - chunk = audio_bytes[i : i + self._bytes_per_10ms] + # Break input into configured chunk sizes + for i in range(0, len(audio_bytes), self._bytes_per_chunk): + chunk = audio_bytes[i : i + self._bytes_per_chunk] # Only the last chunk carries the future to be resolved once fully consumed - fut = future if i + self._bytes_per_10ms >= len(audio_bytes) else None + fut = future if i + self._bytes_per_chunk >= len(audio_bytes) else None self._chunk_queue.append((chunk, fut)) return future @@ -135,7 +143,7 @@ async def recv(self): if future and not future.done(): future.set_result(True) else: - chunk = bytes(self._bytes_per_10ms) # silence + chunk = bytes(self._bytes_per_chunk) # silence # Convert the byte data to an ndarray of int16 samples samples = np.frombuffer(chunk, dtype=np.int16) @@ -145,7 +153,7 @@ async def recv(self): frame.sample_rate = self._sample_rate frame.pts = self._timestamp frame.time_base = fractions.Fraction(1, self._sample_rate) - self._timestamp += self._samples_per_10ms + self._timestamp += self._samples_per_chunk return frame @@ -493,7 +501,10 @@ async def _handle_client_connected(self): self._video_input_track = self._webrtc_connection.video_input_track() self._screen_video_track = self._webrtc_connection.screen_video_input_track() if self._params.audio_out_enabled: - self._audio_output_track = RawAudioTrack(sample_rate=self._out_sample_rate) + self._audio_output_track = RawAudioTrack( + sample_rate=self._out_sample_rate, + num_10ms_chunks=self._params.audio_out_10ms_chunks, + ) self._webrtc_connection.replace_audio_track(self._audio_output_track) if self._params.video_out_enabled: From 33bcb4292c8ce882047821c670e49f088273f86b Mon Sep 17 00:00:00 2001 From: James Hush Date: Thu, 5 Feb 2026 10:03:53 +0800 Subject: [PATCH 2/6] Add changelog entry for PR #3645 --- changelog/3645.fixed.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog/3645.fixed.md diff --git a/changelog/3645.fixed.md b/changelog/3645.fixed.md new file mode 100644 index 0000000000..3bc4f4f5c9 --- /dev/null +++ b/changelog/3645.fixed.md @@ -0,0 +1 @@ +- Fixed `SmallWebRTCTransport` not respecting `TransportParams.audio_out_10ms_chunks` parameter. The transport was hardcoded to produce 10ms audio frames regardless of the configured chunk size. From f45f23fb1a437e1cb19c5c26d6df6f72be86c345 Mon Sep 17 00:00:00 2001 From: James Hush Date: Thu, 5 Feb 2026 10:08:55 +0800 Subject: [PATCH 3/6] Add unit tests for RawAudioTrack audio chunk size configuration --- tests/test_smallwebrtc_transport.py | 139 ++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/test_smallwebrtc_transport.py diff --git a/tests/test_smallwebrtc_transport.py b/tests/test_smallwebrtc_transport.py new file mode 100644 index 0000000000..f47d873d75 --- /dev/null +++ b/tests/test_smallwebrtc_transport.py @@ -0,0 +1,139 @@ +# +# Copyright (c) 2024-2026, Daily +# +# SPDX-License-Identifier: BSD 2-Clause License +# + +import pytest + +from pipecat.transports.smallwebrtc.transport import RawAudioTrack + + +class TestRawAudioTrack: + """Tests for the RawAudioTrack class.""" + + def test_default_chunk_size_is_10ms(self): + """Test that default chunk size is 10ms (num_10ms_chunks=1).""" + sample_rate = 16000 + track = RawAudioTrack(sample_rate=sample_rate) + + # 10ms at 16kHz = 160 samples, 2 bytes per sample = 320 bytes + expected_bytes = int(sample_rate * 10 / 1000) * 2 + assert track._bytes_per_chunk == expected_bytes + assert track._bytes_per_chunk == 320 + + def test_custom_chunk_size_40ms(self): + """Test that num_10ms_chunks=4 produces 40ms chunks.""" + sample_rate = 16000 + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=4) + + # 40ms at 16kHz = 640 samples, 2 bytes per sample = 1280 bytes + expected_bytes = int(sample_rate * 40 / 1000) * 2 + assert track._bytes_per_chunk == expected_bytes + assert track._bytes_per_chunk == 1280 + + def test_custom_chunk_size_20ms(self): + """Test that num_10ms_chunks=2 produces 20ms chunks.""" + sample_rate = 16000 + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=2) + + # 20ms at 16kHz = 320 samples, 2 bytes per sample = 640 bytes + expected_bytes = int(sample_rate * 20 / 1000) * 2 + assert track._bytes_per_chunk == expected_bytes + assert track._bytes_per_chunk == 640 + + @pytest.mark.asyncio + async def test_add_audio_bytes_queues_correct_chunks(self): + """Test that add_audio_bytes breaks audio into correct chunk sizes.""" + sample_rate = 16000 + num_chunks = 4 # 40ms + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) + + # Create 80ms of audio (2 chunks of 40ms each) + audio_bytes = bytes(track._bytes_per_chunk * 2) + track.add_audio_bytes(audio_bytes) + + # Should have exactly 2 chunks in the queue + assert len(track._chunk_queue) == 2 + + # Each chunk should be the correct size + chunk1, _ = track._chunk_queue[0] + chunk2, _ = track._chunk_queue[1] + assert len(chunk1) == track._bytes_per_chunk + assert len(chunk2) == track._bytes_per_chunk + + @pytest.mark.asyncio + async def test_add_audio_bytes_rejects_invalid_size(self): + """Test that add_audio_bytes rejects audio not a multiple of chunk size.""" + sample_rate = 16000 + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=4) + + # Create audio that's not a multiple of 40ms chunk size + invalid_audio = bytes(track._bytes_per_chunk + 100) + + with pytest.raises(ValueError) as exc_info: + track.add_audio_bytes(invalid_audio) + + assert "40ms" in str(exc_info.value) + + @pytest.mark.asyncio + async def test_recv_returns_correct_frame_size(self): + """Test that recv() returns AudioFrames with correct sample count.""" + sample_rate = 16000 + num_chunks = 4 # 40ms + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) + + # Add one 40ms chunk of audio + audio_bytes = bytes(track._bytes_per_chunk) + track.add_audio_bytes(audio_bytes) + + # Receive the frame + frame = await track.recv() + + # Frame should have correct number of samples (40ms worth) + expected_samples = int(sample_rate * 40 / 1000) # 640 samples + assert frame.samples == expected_samples + + @pytest.mark.asyncio + async def test_recv_silence_has_correct_size(self): + """Test that silence frames have correct size when queue is empty.""" + sample_rate = 16000 + num_chunks = 4 # 40ms + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) + + # Don't add any audio - should get silence + frame = await track.recv() + + # Silence frame should have correct number of samples + expected_samples = int(sample_rate * 40 / 1000) # 640 samples + assert frame.samples == expected_samples + + @pytest.mark.asyncio + async def test_timestamp_advances_by_chunk_samples(self): + """Test that timestamp advances correctly based on chunk size.""" + sample_rate = 16000 + num_chunks = 4 # 40ms + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) + + # Initial timestamp should be 0 + assert track._timestamp == 0 + + # Receive one frame (silence is fine) + await track.recv() + + # Timestamp should advance by samples_per_chunk + expected_samples = int(sample_rate * 40 / 1000) # 640 samples + assert track._timestamp == expected_samples + + def test_different_sample_rates(self): + """Test chunk size calculation at different sample rates.""" + test_cases = [ + (8000, 4, 640), # 8kHz, 40ms = 320 samples * 2 bytes = 640 bytes + (16000, 4, 1280), # 16kHz, 40ms = 640 samples * 2 bytes = 1280 bytes + (24000, 4, 1920), # 24kHz, 40ms = 960 samples * 2 bytes = 1920 bytes + (48000, 4, 3840), # 48kHz, 40ms = 1920 samples * 2 bytes = 3840 bytes + ] + + for sample_rate, num_chunks, expected_bytes in test_cases: + track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) + assert track._bytes_per_chunk == expected_bytes From 63083cd02157a59ab55be518c4fcd484b8f5bae6 Mon Sep 17 00:00:00 2001 From: James Hush Date: Thu, 5 Feb 2026 10:11:10 +0800 Subject: [PATCH 4/6] Add validation for num_10ms_chunks parameter Ensure num_10ms_chunks is a positive integer to prevent division by zero or invalid audio chunk sizes. Raises ValueError if value is less than 1. --- src/pipecat/transports/smallwebrtc/transport.py | 5 +++++ tests/test_smallwebrtc_transport.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/pipecat/transports/smallwebrtc/transport.py b/src/pipecat/transports/smallwebrtc/transport.py index 4740496ff1..9712df3b78 100644 --- a/src/pipecat/transports/smallwebrtc/transport.py +++ b/src/pipecat/transports/smallwebrtc/transport.py @@ -84,7 +84,12 @@ def __init__(self, sample_rate: int, num_10ms_chunks: int = 1): Args: sample_rate: The audio sample rate in Hz. num_10ms_chunks: Number of 10ms chunks per output frame (default 1). + + Raises: + ValueError: If num_10ms_chunks is not a positive integer. """ + if num_10ms_chunks < 1: + raise ValueError(f"num_10ms_chunks must be a positive integer, got {num_10ms_chunks}") super().__init__() self._sample_rate = sample_rate self._num_10ms_chunks = num_10ms_chunks diff --git a/tests/test_smallwebrtc_transport.py b/tests/test_smallwebrtc_transport.py index f47d873d75..1712ea1afa 100644 --- a/tests/test_smallwebrtc_transport.py +++ b/tests/test_smallwebrtc_transport.py @@ -137,3 +137,17 @@ def test_different_sample_rates(self): for sample_rate, num_chunks, expected_bytes in test_cases: track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) assert track._bytes_per_chunk == expected_bytes + + def test_invalid_num_10ms_chunks_zero(self): + """Test that num_10ms_chunks=0 raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + RawAudioTrack(sample_rate=16000, num_10ms_chunks=0) + + assert "positive integer" in str(exc_info.value) + + def test_invalid_num_10ms_chunks_negative(self): + """Test that negative num_10ms_chunks raises ValueError.""" + with pytest.raises(ValueError) as exc_info: + RawAudioTrack(sample_rate=16000, num_10ms_chunks=-1) + + assert "positive integer" in str(exc_info.value) From 5d3aadadf1a39dd788a9c70f229ec8963fb0ad5a Mon Sep 17 00:00:00 2001 From: James Hush Date: Thu, 5 Feb 2026 10:12:45 +0800 Subject: [PATCH 5/6] Improve timestamp test to check frame pts difference --- tests/test_smallwebrtc_transport.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tests/test_smallwebrtc_transport.py b/tests/test_smallwebrtc_transport.py index 1712ea1afa..ad21e6ec17 100644 --- a/tests/test_smallwebrtc_transport.py +++ b/tests/test_smallwebrtc_transport.py @@ -115,15 +115,16 @@ async def test_timestamp_advances_by_chunk_samples(self): num_chunks = 4 # 40ms track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) - # Initial timestamp should be 0 - assert track._timestamp == 0 + # Receive first frame and check its timestamp + frame1 = await track.recv() + first_pts = frame1.pts - # Receive one frame (silence is fine) - await track.recv() + # Receive second frame + frame2 = await track.recv() - # Timestamp should advance by samples_per_chunk + # Timestamp should advance by samples_per_chunk between frames expected_samples = int(sample_rate * 40 / 1000) # 640 samples - assert track._timestamp == expected_samples + assert frame2.pts - first_pts == expected_samples def test_different_sample_rates(self): """Test chunk size calculation at different sample rates.""" From 6eb7da6a1b08581aab70701e3e43e73b109ed3f6 Mon Sep 17 00:00:00 2001 From: James Hush Date: Thu, 5 Feb 2026 10:27:21 +0800 Subject: [PATCH 6/6] Align smallwebrtc test with repo conventions (unittest + optional dep guard) Migrate from pytest-style to unittest.IsolatedAsyncioTestCase to match the pattern used by other transport tests (e.g. test_livekit_transport.py). Guard the aiortc/av import with try/except and skipUnless so tests gracefully skip when webrtc dependencies aren't installed. Add pyright suppressions for false positives inherent to testing internals of optional-dependency classes. Co-Authored-By: Claude Opus 4.5 --- tests/test_smallwebrtc_transport.py | 74 ++++++++++++++++++----------- 1 file changed, 45 insertions(+), 29 deletions(-) diff --git a/tests/test_smallwebrtc_transport.py b/tests/test_smallwebrtc_transport.py index ad21e6ec17..8fff11ac30 100644 --- a/tests/test_smallwebrtc_transport.py +++ b/tests/test_smallwebrtc_transport.py @@ -4,12 +4,29 @@ # SPDX-License-Identifier: BSD 2-Clause License # -import pytest +# pyright: reportConstantRedefinition=false +# pyright: reportPrivateUsage=false, reportUnknownMemberType=false +# pyright: reportUnknownArgumentType=false, reportUnknownVariableType=false +# pyright: reportOperatorIssue=false +# pyright: reportOptionalCall=false -from pipecat.transports.smallwebrtc.transport import RawAudioTrack +import unittest +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from pipecat.transports.smallwebrtc.transport import RawAudioTrack -class TestRawAudioTrack: +try: + from pipecat.transports.smallwebrtc.transport import RawAudioTrack + + WEBRTC_AVAILABLE = True +except (ImportError, Exception): + WEBRTC_AVAILABLE = False + RawAudioTrack = None # type: ignore[misc,assignment] + + +@unittest.skipUnless(WEBRTC_AVAILABLE, "webrtc dependencies not installed") +class TestRawAudioTrack(unittest.IsolatedAsyncioTestCase): """Tests for the RawAudioTrack class.""" def test_default_chunk_size_is_10ms(self): @@ -19,8 +36,8 @@ def test_default_chunk_size_is_10ms(self): # 10ms at 16kHz = 160 samples, 2 bytes per sample = 320 bytes expected_bytes = int(sample_rate * 10 / 1000) * 2 - assert track._bytes_per_chunk == expected_bytes - assert track._bytes_per_chunk == 320 + self.assertEqual(track._bytes_per_chunk, expected_bytes) + self.assertEqual(track._bytes_per_chunk, 320) def test_custom_chunk_size_40ms(self): """Test that num_10ms_chunks=4 produces 40ms chunks.""" @@ -29,8 +46,8 @@ def test_custom_chunk_size_40ms(self): # 40ms at 16kHz = 640 samples, 2 bytes per sample = 1280 bytes expected_bytes = int(sample_rate * 40 / 1000) * 2 - assert track._bytes_per_chunk == expected_bytes - assert track._bytes_per_chunk == 1280 + self.assertEqual(track._bytes_per_chunk, expected_bytes) + self.assertEqual(track._bytes_per_chunk, 1280) def test_custom_chunk_size_20ms(self): """Test that num_10ms_chunks=2 produces 20ms chunks.""" @@ -39,10 +56,9 @@ def test_custom_chunk_size_20ms(self): # 20ms at 16kHz = 320 samples, 2 bytes per sample = 640 bytes expected_bytes = int(sample_rate * 20 / 1000) * 2 - assert track._bytes_per_chunk == expected_bytes - assert track._bytes_per_chunk == 640 + self.assertEqual(track._bytes_per_chunk, expected_bytes) + self.assertEqual(track._bytes_per_chunk, 640) - @pytest.mark.asyncio async def test_add_audio_bytes_queues_correct_chunks(self): """Test that add_audio_bytes breaks audio into correct chunk sizes.""" sample_rate = 16000 @@ -54,15 +70,14 @@ async def test_add_audio_bytes_queues_correct_chunks(self): track.add_audio_bytes(audio_bytes) # Should have exactly 2 chunks in the queue - assert len(track._chunk_queue) == 2 + self.assertEqual(len(track._chunk_queue), 2) # Each chunk should be the correct size chunk1, _ = track._chunk_queue[0] chunk2, _ = track._chunk_queue[1] - assert len(chunk1) == track._bytes_per_chunk - assert len(chunk2) == track._bytes_per_chunk + self.assertEqual(len(chunk1), track._bytes_per_chunk) + self.assertEqual(len(chunk2), track._bytes_per_chunk) - @pytest.mark.asyncio async def test_add_audio_bytes_rejects_invalid_size(self): """Test that add_audio_bytes rejects audio not a multiple of chunk size.""" sample_rate = 16000 @@ -71,12 +86,11 @@ async def test_add_audio_bytes_rejects_invalid_size(self): # Create audio that's not a multiple of 40ms chunk size invalid_audio = bytes(track._bytes_per_chunk + 100) - with pytest.raises(ValueError) as exc_info: + with self.assertRaises(ValueError) as ctx: track.add_audio_bytes(invalid_audio) - assert "40ms" in str(exc_info.value) + self.assertIn("40ms", str(ctx.exception)) - @pytest.mark.asyncio async def test_recv_returns_correct_frame_size(self): """Test that recv() returns AudioFrames with correct sample count.""" sample_rate = 16000 @@ -92,9 +106,8 @@ async def test_recv_returns_correct_frame_size(self): # Frame should have correct number of samples (40ms worth) expected_samples = int(sample_rate * 40 / 1000) # 640 samples - assert frame.samples == expected_samples + self.assertEqual(frame.samples, expected_samples) - @pytest.mark.asyncio async def test_recv_silence_has_correct_size(self): """Test that silence frames have correct size when queue is empty.""" sample_rate = 16000 @@ -106,9 +119,8 @@ async def test_recv_silence_has_correct_size(self): # Silence frame should have correct number of samples expected_samples = int(sample_rate * 40 / 1000) # 640 samples - assert frame.samples == expected_samples + self.assertEqual(frame.samples, expected_samples) - @pytest.mark.asyncio async def test_timestamp_advances_by_chunk_samples(self): """Test that timestamp advances correctly based on chunk size.""" sample_rate = 16000 @@ -117,14 +129,14 @@ async def test_timestamp_advances_by_chunk_samples(self): # Receive first frame and check its timestamp frame1 = await track.recv() - first_pts = frame1.pts - # Receive second frame frame2 = await track.recv() # Timestamp should advance by samples_per_chunk between frames + self.assertIsNotNone(frame1.pts) + self.assertIsNotNone(frame2.pts) expected_samples = int(sample_rate * 40 / 1000) # 640 samples - assert frame2.pts - first_pts == expected_samples + self.assertEqual(frame2.pts - frame1.pts, expected_samples) def test_different_sample_rates(self): """Test chunk size calculation at different sample rates.""" @@ -137,18 +149,22 @@ def test_different_sample_rates(self): for sample_rate, num_chunks, expected_bytes in test_cases: track = RawAudioTrack(sample_rate=sample_rate, num_10ms_chunks=num_chunks) - assert track._bytes_per_chunk == expected_bytes + self.assertEqual(track._bytes_per_chunk, expected_bytes) def test_invalid_num_10ms_chunks_zero(self): """Test that num_10ms_chunks=0 raises ValueError.""" - with pytest.raises(ValueError) as exc_info: + with self.assertRaises(ValueError) as ctx: RawAudioTrack(sample_rate=16000, num_10ms_chunks=0) - assert "positive integer" in str(exc_info.value) + self.assertIn("positive integer", str(ctx.exception)) def test_invalid_num_10ms_chunks_negative(self): """Test that negative num_10ms_chunks raises ValueError.""" - with pytest.raises(ValueError) as exc_info: + with self.assertRaises(ValueError) as ctx: RawAudioTrack(sample_rate=16000, num_10ms_chunks=-1) - assert "positive integer" in str(exc_info.value) + self.assertIn("positive integer", str(ctx.exception)) + + +if __name__ == "__main__": + unittest.main()