Skip to content

Commit 05922de

Browse files
authored
Always chunk Wyoming TTS audio (home-assistant#156079)
1 parent 7675a44 commit 05922de

File tree

2 files changed

+30
-21
lines changed

2 files changed

+30
-21
lines changed

homeassistant/components/wyoming/assist_satellite.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
_PING_SEND_DELAY: Final = 2
5555
_PIPELINE_FINISH_TIMEOUT: Final = 1
5656
_TTS_SAMPLE_RATE: Final = 22050
57-
_ANNOUNCE_CHUNK_BYTES: Final = 2048 # 1024 samples
57+
_AUDIO_CHUNK_BYTES: Final = 2048 # 1024 samples
5858
_TTS_TIMEOUT_EXTRA: Final = 1.0
5959

6060
# Wyoming stage -> Assist stage
@@ -360,7 +360,7 @@ async def async_announce(self, announcement: AssistSatelliteAnnouncement) -> Non
360360
)
361361
assert proc.stdout is not None
362362
while True:
363-
chunk_bytes = await proc.stdout.read(_ANNOUNCE_CHUNK_BYTES)
363+
chunk_bytes = await proc.stdout.read(_AUDIO_CHUNK_BYTES)
364364
if not chunk_bytes:
365365
break
366366

@@ -782,17 +782,22 @@ async def _stream_tts(self, tts_result: tts.ResultStream) -> None:
782782
assert sample_width is not None
783783
assert sample_channels is not None
784784

785-
audio_chunk = AudioChunk(
786-
rate=sample_rate,
787-
width=sample_width,
788-
channels=sample_channels,
789-
audio=data_chunk,
790-
timestamp=timestamp,
791-
)
785+
data_chunk_idx = 0
786+
while data_chunk_idx < len(data_chunk):
787+
audio_chunk = AudioChunk(
788+
rate=sample_rate,
789+
width=sample_width,
790+
channels=sample_channels,
791+
audio=data_chunk[
792+
data_chunk_idx : data_chunk_idx + _AUDIO_CHUNK_BYTES
793+
],
794+
timestamp=timestamp,
795+
)
792796

793-
await self._client.write_event(audio_chunk.event())
794-
timestamp += audio_chunk.milliseconds
795-
total_seconds += audio_chunk.seconds
797+
await self._client.write_event(audio_chunk.event())
798+
timestamp += audio_chunk.milliseconds
799+
total_seconds += audio_chunk.seconds
800+
data_chunk_idx += _AUDIO_CHUNK_BYTES
796801

797802
await self._client.write_event(AudioStop(timestamp=timestamp).event())
798803
_LOGGER.debug("TTS streaming complete")

tests/components/wyoming/test_satellite.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ async def setup_config_entry(hass: HomeAssistant) -> MockConfigEntry:
5959
return entry
6060

6161

62-
def get_test_wav() -> bytes:
62+
def get_test_wav(chunk_copies: int = 1) -> bytes:
6363
"""Get bytes for test WAV file."""
6464
with io.BytesIO() as wav_io:
6565
with wave.open(wav_io, "wb") as wav_file:
@@ -68,7 +68,7 @@ def get_test_wav() -> bytes:
6868
wav_file.setnchannels(1)
6969

7070
# Single frame
71-
wav_file.writeframes(b"1234")
71+
wav_file.writeframes(b"1234" * chunk_copies)
7272

7373
return wav_io.getvalue()
7474

@@ -111,6 +111,7 @@ def __init__(
111111
self.tts_audio_chunk_event = asyncio.Event()
112112
self.tts_audio_stop_event = asyncio.Event()
113113
self.tts_audio_chunk: AudioChunk | None = None
114+
self.tts_audio_chunks: list[AudioChunk] = []
114115

115116
self.error_event = asyncio.Event()
116117
self.error: Error | None = None
@@ -169,6 +170,7 @@ async def write_event(self, event: Event):
169170
self.tts_audio_start_event.set()
170171
elif AudioChunk.is_type(event.type):
171172
self.tts_audio_chunk = AudioChunk.from_event(event)
173+
self.tts_audio_chunks.append(self.tts_audio_chunk)
172174
self.tts_audio_chunk_event.set()
173175
elif AudioStop.is_type(event.type):
174176
self.tts_audio_stop_event.set()
@@ -1537,7 +1539,7 @@ async def async_pipeline_from_audio_stream(
15371539
assert pipeline_kwargs.get("device_id") == device.device_id
15381540

15391541
# Send TTS info early
1540-
mock_tts_result_stream = MockResultStream(hass, "wav", get_test_wav())
1542+
mock_tts_result_stream = MockResultStream(hass, "wav", get_test_wav(1000))
15411543
pipeline_event_callback(
15421544
assist_pipeline.PipelineEvent(
15431545
assist_pipeline.PipelineEventType.RUN_START,
@@ -1604,12 +1606,14 @@ async def async_pipeline_from_audio_stream(
16041606
await mock_client.tts_audio_chunk_event.wait()
16051607
await mock_client.tts_audio_stop_event.wait()
16061608

1607-
# Verify audio chunk from test WAV
1608-
assert mock_client.tts_audio_chunk is not None
1609-
assert mock_client.tts_audio_chunk.rate == 22050
1610-
assert mock_client.tts_audio_chunk.width == 2
1611-
assert mock_client.tts_audio_chunk.channels == 1
1612-
assert mock_client.tts_audio_chunk.audio == b"1234"
1609+
# Verify audio chunks from test WAV
1610+
assert len(mock_client.tts_audio_chunks) == 2
1611+
chunk_sizes = (2048, 1952) # 1024 samples per chunk
1612+
for i, audio_chunk in enumerate(mock_client.tts_audio_chunks):
1613+
assert audio_chunk.rate == 22050
1614+
assert audio_chunk.width == 2
1615+
assert audio_chunk.channels == 1
1616+
assert len(audio_chunk.audio) == chunk_sizes[i]
16131617

16141618
# Text-to-speech text
16151619
pipeline_event_callback(

0 commit comments

Comments
 (0)