Skip to content

Commit 8148beb

Browse files
Adriaan-ANTclaude
andcommitted
Fix WebSocket script to use free-tier compatible audio format
The script was using pcm_44100 format which requires ElevenLabs Pro tier, causing WebSocket connections to close with error 1008. Fixed by: - Changed TTS_OUTPUT_FORMAT from pcm_44100 to mp3_44100_128 (free tier) - Added pydub dependency for MP3 decoding - Updated AudioQueue.add() to decode MP3 chunks before playback - Enhanced WebSocket close handler to log error details - Updated docstring to reflect MP3 format usage The script now works with free tier ElevenLabs accounts. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent fb5749c commit 8148beb

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed

third_party/ElevenLabs/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ numpy>=1.26.0
55
scipy>=1.16.2
66
websocket-client>=1.8.0
77
python-dotenv>=1.0.0
8+
pydub>=0.25.1

third_party/ElevenLabs/stream_voice_assistant_websocket.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
Key optimizations:
2727
- Text chunks sent to TTS immediately as they arrive from Claude
2828
- No sentence buffering required - audio generation begins instantly
29-
- PCM audio format eliminates MP3 encoding artifacts
29+
- MP3 audio format compatible with free tier accounts
3030
- Continuous audio streaming with pre-buffering prevents crackling
3131
"""
3232

@@ -43,6 +43,7 @@
4343
import sounddevice as sd
4444
import websocket
4545
from dotenv import load_dotenv
46+
from pydub import AudioSegment
4647
from scipy.io import wavfile
4748

4849
# Load environment variables from .env file
@@ -74,7 +75,7 @@
7475

7576
# TTS configuration
7677
TTS_MODEL_ID = "eleven_turbo_v2_5" # Fast, low-latency model
77-
TTS_OUTPUT_FORMAT = "pcm_44100" # PCM format eliminates MP3 encoding artifacts
78+
TTS_OUTPUT_FORMAT = "mp3_44100_128" # MP3 format (free tier compatible)
7879

7980

8081
class AudioQueue:
@@ -103,20 +104,27 @@ def __init__(self):
103104
self.read_position = 0
104105

105106
def add(self, audio_data):
106-
"""Add PCM audio chunk to the playback buffer.
107+
"""Add MP3 audio chunk to the playback buffer.
107108
108109
Args:
109-
audio_data: Raw PCM audio bytes (16-bit signed integers)
110+
audio_data: Raw MP3 audio bytes
110111
"""
111-
# Convert PCM int16 to float32
112-
samples = np.frombuffer(audio_data, dtype=np.int16)
112+
# Decode MP3 to PCM
113+
audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_data))
114+
115+
# Convert to numpy array
116+
samples = np.array(audio_segment.get_array_of_samples(), dtype=np.int16)
113117
samples = samples.astype(np.float32) / 32768.0
114118

115119
if not self.playing:
116-
self.sample_rate = 44100
117-
self.channels = 1
120+
self.sample_rate = audio_segment.frame_rate
121+
self.channels = audio_segment.channels
118122

119-
samples = samples.reshape((-1, 1))
123+
# Reshape based on number of channels
124+
if self.channels > 1:
125+
samples = samples.reshape((-1, self.channels))
126+
else:
127+
samples = samples.reshape((-1, 1))
120128

121129
with self.buffer_lock:
122130
self.buffer.extend(samples.tobytes())
@@ -289,7 +297,8 @@ def on_error(ws, error):
289297

290298
def on_close(ws, close_status_code, close_msg):
291299
"""Handle WebSocket connection closure."""
292-
pass
300+
if close_status_code or close_msg:
301+
print(f"\nWebSocket closed with status {close_status_code}: {close_msg}")
293302

294303
def on_open(ws):
295304
nonlocal ws_connected

0 commit comments

Comments
 (0)