feat: add WebSocketOptions for configurable WebSocket connections

twangodev · twangodev · commit 5c1e168e23e2 · 2025-11-17T14:42:44.000-06:00
diff --git a/src/fishaudio/__init__.py b/src/fishaudio/__init__.py
@@ -29,6 +29,7 @@
 from ._version import __version__
 from .client import AsyncFishAudio, FishAudio
 from .core.iterators import AsyncAudioStream, AudioStream
+from .core.websocket_options import WebSocketOptions
 from .exceptions import (
     APIError,
     AuthenticationError,
@@ -41,7 +42,7 @@
     ValidationError,
     WebSocketError,
 )
-from .types import FlushEvent, TextEvent
+from .types import FlushEvent, ReferenceAudio, TextEvent, TTSConfig
 from .utils import play, save, stream
 
 # Main exports
@@ -56,8 +57,12 @@
     # Audio streams
     "AudioStream",
     "AsyncAudioStream",
+    # Configuration
+    "TTSConfig",
+    "WebSocketOptions",
     # Types
     "FlushEvent",
+    "ReferenceAudio",
     "TextEvent",
     # Exceptions
     "APIError",
diff --git a/src/fishaudio/core/__init__.py b/src/fishaudio/core/__init__.py
@@ -3,10 +3,12 @@
 from .client_wrapper import AsyncClientWrapper, ClientWrapper
 from .omit import OMIT
 from .request_options import RequestOptions
+from .websocket_options import WebSocketOptions
 
 __all__ = [
     "AsyncClientWrapper",
     "ClientWrapper",
     "OMIT",
     "RequestOptions",
+    "WebSocketOptions",
 ]
diff --git a/src/fishaudio/core/websocket_options.py b/src/fishaudio/core/websocket_options.py
@@ -0,0 +1,42 @@
+"""WebSocket-level options for WebSocket connections."""
+
+from typing import Any, Dict, Optional
+
+
+class WebSocketOptions:
+    """
+    Options that can be provided to configure WebSocket connections.
+
+    Attributes:
+        keepalive_ping_timeout_seconds: Maximum time to wait for a pong response
+            to a keepalive ping before considering the connection dead (default: 20s)
+        keepalive_ping_interval_seconds: Interval between keepalive pings (default: 20s)
+        max_message_size_bytes: Maximum size for incoming messages (default: 65,536 bytes)
+        queue_size: Size of the message receive queue (default: 512)
+    """
+
+    def __init__(
+        self,
+        *,
+        keepalive_ping_timeout_seconds: Optional[float] = None,
+        keepalive_ping_interval_seconds: Optional[float] = None,
+        max_message_size_bytes: Optional[int] = None,
+        queue_size: Optional[int] = None,
+    ):
+        self.keepalive_ping_timeout_seconds = keepalive_ping_timeout_seconds
+        self.keepalive_ping_interval_seconds = keepalive_ping_interval_seconds
+        self.max_message_size_bytes = max_message_size_bytes
+        self.queue_size = queue_size
+
+    def to_httpx_ws_kwargs(self) -> Dict[str, Any]:
+        """Convert to kwargs dict for httpx_ws aconnect_ws/connect_ws."""
+        kwargs = {}
+        if self.keepalive_ping_timeout_seconds is not None:
+            kwargs["keepalive_ping_timeout_seconds"] = self.keepalive_ping_timeout_seconds
+        if self.keepalive_ping_interval_seconds is not None:
+            kwargs["keepalive_ping_interval_seconds"] = self.keepalive_ping_interval_seconds
+        if self.max_message_size_bytes is not None:
+            kwargs["max_message_size_bytes"] = self.max_message_size_bytes
+        if self.queue_size is not None:
+            kwargs["queue_size"] = self.queue_size
+        return kwargs
diff --git a/src/fishaudio/resources/tts.py b/src/fishaudio/resources/tts.py
@@ -8,7 +8,7 @@
 from httpx_ws import AsyncWebSocketSession, WebSocketSession, aconnect_ws, connect_ws
 
 from .realtime import aiter_websocket_audio, iter_websocket_audio
-from ..core import AsyncClientWrapper, ClientWrapper, RequestOptions
+from ..core import AsyncClientWrapper, ClientWrapper, RequestOptions, WebSocketOptions
 from ..core.iterators import AsyncAudioStream, AudioStream
 from ..types import (
     AudioFormat,
@@ -215,6 +215,7 @@ def stream_websocket(
         config: TTSConfig = TTSConfig(),
         model: Model = "s1",
         max_workers: int = 10,
+        ws_options: Optional[WebSocketOptions] = None,
     ) -> Iterator[bytes]:
         """
         Stream text and receive audio in real-time via WebSocket.
@@ -305,6 +306,9 @@ def text_generator():
                 speed, base=config.prosody
             )
 
+        # Prepare WebSocket connection kwargs
+        ws_kwargs = ws_options.to_httpx_ws_kwargs() if ws_options else {}
+
         executor = ThreadPoolExecutor(max_workers=max_workers)
 
         try:
@@ -316,6 +320,7 @@ def text_generator():
                     "model": model,
                     "Authorization": f"Bearer {self._client.api_key}",
                 },
+                **ws_kwargs,
             ) as ws:
 
                 def sender():
@@ -502,6 +507,7 @@ async def stream_websocket(
         speed: Optional[float] = None,
         config: TTSConfig = TTSConfig(),
         model: Model = "s1",
+        ws_options: Optional[WebSocketOptions] = None,
     ):
         """
         Stream text and receive audio in real-time via WebSocket (async).
@@ -591,11 +597,15 @@ async def text_generator():
                 speed, base=config.prosody
             )
 
+        # Prepare WebSocket connection kwargs
+        ws_kwargs = ws_options.to_httpx_ws_kwargs() if ws_options else {}
+
         ws: AsyncWebSocketSession
         async with aconnect_ws(
             "/v1/tts/live",
             client=self._client.client,
             headers={"model": model, "Authorization": f"Bearer {self._client.api_key}"},
+            **ws_kwargs,
         ) as ws:
 
             async def sender():