fix: increase elevenlabs websocket timeout (#1582)

jayeshp19 · web-flow · commit eaf112cc5f14 · 2025-03-02T14:53:22.000+05:30
diff --git a/.changeset/cold-walls-jam.md b/.changeset/cold-walls-jam.md
@@ -0,0 +1,5 @@
+---
+"livekit-plugins-deepgram": patch
+---
+
+set mex session duration to 1 hour in deepgram connection pool
diff --git a/.changeset/large-ears-mate.md b/.changeset/large-ears-mate.md
@@ -0,0 +1,5 @@
+---
+"livekit-plugins-elevenlabs": patch
+---
+
+increase elevenlabs websocket connection timeout to default 300 seconds
diff --git a/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/tts.py b/livekit-plugins/livekit-plugins-deepgram/livekit/plugins/deepgram/tts.py
@@ -86,6 +86,7 @@ def __init__(
         self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
             connect_cb=self._connect_ws,
             close_cb=self._close_ws,
+            max_session_duration=3600,  # 1 hour
         )
 
     async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:
diff --git a/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py b/livekit-plugins/livekit-plugins-elevenlabs/livekit/plugins/elevenlabs/tts.py
@@ -87,6 +87,7 @@ class Voice:
 
 API_BASE_URL_V1 = "https://api.elevenlabs.io/v1"
 AUTHORIZATION_HEADER = "xi-api-key"
+WS_INACTIVITY_TIMEOUT = 300
 
 
 @dataclass
@@ -102,6 +103,7 @@ class _TTSOptions:
     word_tokenizer: tokenize.WordTokenizer
     chunk_length_schedule: list[int]
     enable_ssml_parsing: bool
+    inactivity_timeout: int
 
 
 class TTS(tts.TTS):
@@ -114,6 +116,7 @@ def __init__(
         base_url: str | None = None,
         encoding: TTSEncoding = "mp3_22050_32",
         streaming_latency: int = 3,
+        inactivity_timeout: int = WS_INACTIVITY_TIMEOUT,
         word_tokenizer: tokenize.WordTokenizer = tokenize.basic.WordTokenizer(
             ignore_punctuation=False  # punctuation can help for intonation
         ),
@@ -134,6 +137,7 @@ def __init__(
             base_url (str | None): Custom base URL for the API. Optional.
             encoding (TTSEncoding): Audio encoding format. Defaults to "mp3_22050_32".
             streaming_latency (int): Latency in seconds for streaming. Defaults to 3.
+            inactivity_timeout (int): Inactivity timeout in seconds for the websocket connection. Defaults to 300.
             word_tokenizer (tokenize.WordTokenizer): Tokenizer for processing text. Defaults to basic WordTokenizer.
             enable_ssml_parsing (bool): Enable SSML parsing for input text. Defaults to False.
             chunk_length_schedule (list[int]): Schedule for chunk lengths, ranging from 50 to 500. Defaults to [80, 120, 200, 260].
@@ -173,6 +177,7 @@ def __init__(
             chunk_length_schedule=chunk_length_schedule,
             enable_ssml_parsing=enable_ssml_parsing,
             language=language,
+            inactivity_timeout=inactivity_timeout,
         )
         self._session = http_session
         self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](
@@ -581,10 +586,11 @@ def _stream_url(opts: _TTSOptions) -> str:
     latency = opts.streaming_latency
     enable_ssml = str(opts.enable_ssml_parsing).lower()
     language = opts.language
+    inactivity_timeout = opts.inactivity_timeout
     url = (
         f"{base_url}/text-to-speech/{voice_id}/stream-input?"
         f"model_id={model_id}&output_format={output_format}&optimize_streaming_latency={latency}&"
-        f"enable_ssml_parsing={enable_ssml}"
+        f"enable_ssml_parsing={enable_ssml}&inactivity_timeout={inactivity_timeout}"
     )
     if language is not None:
         url += f"&language_code={language}"

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"livekit-plugins-deepgram": patch
 +---
++
 +set mex session duration to 1 hour in deepgram connection pool
Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,7 @@ def __init__(`
`86`	`86`	`self._pool = utils.ConnectionPool[aiohttp.ClientWebSocketResponse](`
`87`	`87`	`connect_cb=self._connect_ws,`
`88`	`88`	`close_cb=self._close_ws,`
	`89`	`+ max_session_duration=3600, # 1 hour`
`89`	`90`	`)`
`90`	`91`
`91`	`92`	`async def _connect_ws(self) -> aiohttp.ClientWebSocketResponse:`