move audio mixer inside of MediaDevices for ease of playback

chenosaurus · chenosaurus · commit 846538f65b71 · 2025-10-07T15:40:11.000-07:00
diff --git a/examples/local_audio/full_duplex.py b/examples/local_audio/full_duplex.py
@@ -6,7 +6,7 @@
 from dotenv import load_dotenv, find_dotenv
 
 from livekit import api, rtc
-from db_meter import calculate_db_level, display_dual_db_meters
+from db_meter import calculate_db_level, display_single_db_meter
 
 
 async def main() -> None:
@@ -29,46 +29,16 @@ async def main() -> None:
     mic = devices.open_input()
     player = devices.open_output()
 
-    # Mixer for all remote audio streams
-    mixer = rtc.AudioMixer(sample_rate=48000, num_channels=1)
-
-    # dB level monitoring
+    # dB level monitoring (mic only)
     mic_db_queue = queue.Queue()
-    room_db_queue = queue.Queue()
-
-    # Track stream bookkeeping for cleanup
-    streams_by_pub: dict[str, rtc.AudioStream] = {}
-    streams_by_participant: dict[str, set[rtc.AudioStream]] = {}
-    
-    # remove stream from mixer and close it
-    async def _remove_stream(
-        stream: rtc.AudioStream, participant_sid: str | None = None, pub_sid: str | None = None
-    ) -> None:
-        try:
-            mixer.remove_stream(stream)
-        except Exception:
-            pass
-        try:
-            await stream.aclose()
-        except Exception:
-            pass
-        if participant_sid and participant_sid in streams_by_participant:
-            streams_by_participant.get(participant_sid, set()).discard(stream)
-            if not streams_by_participant.get(participant_sid):
-                streams_by_participant.pop(participant_sid, None)
-        if pub_sid is not None:
-            streams_by_pub.pop(pub_sid, None)
 
     def on_track_subscribed(
         track: rtc.Track,
         publication: rtc.RemoteTrackPublication,
         participant: rtc.RemoteParticipant,
     ):
         if track.kind == rtc.TrackKind.KIND_AUDIO:
-            stream = rtc.AudioStream(track, sample_rate=48000, num_channels=1)
-            streams_by_pub[publication.sid] = stream
-            streams_by_participant.setdefault(participant.sid, set()).add(stream)
-            mixer.add_stream(stream)
+            player.add_track(track)
             logging.info("subscribed to audio from %s", participant.identity)
 
     room.on("track_subscribed", on_track_subscribed)
@@ -78,37 +48,11 @@ def on_track_unsubscribed(
         publication: rtc.RemoteTrackPublication,
         participant: rtc.RemoteParticipant,
     ):
-        stream = streams_by_pub.get(publication.sid)
-        if stream is not None:
-            asyncio.create_task(_remove_stream(stream, participant.sid, publication.sid))
-            logging.info("unsubscribed from audio of %s", participant.identity)
+        asyncio.create_task(player.remove_track(track))
+        logging.info("unsubscribed from audio of %s", participant.identity)
 
     room.on("track_unsubscribed", on_track_unsubscribed)
 
-    def on_track_unpublished(
-        publication: rtc.RemoteTrackPublication, participant: rtc.RemoteParticipant
-    ):
-        stream = streams_by_pub.get(publication.sid)
-        if stream is not None:
-            asyncio.create_task(_remove_stream(stream, participant.sid, publication.sid))
-            logging.info("track unpublished: %s from %s", publication.sid, participant.identity)
-
-    room.on("track_unpublished", on_track_unpublished)
-
-    def on_participant_disconnected(participant: rtc.RemoteParticipant):
-        streams = list(streams_by_participant.pop(participant.sid, set()))
-        for stream in streams:
-            # Best-effort discover publication sid
-            pub_sid = None
-            for k, v in list(streams_by_pub.items()):
-                if v is stream:
-                    pub_sid = k
-                    break
-            asyncio.create_task(_remove_stream(stream, participant.sid, pub_sid))
-        logging.info("participant disconnected: %s", participant.identity)
-
-    room.on("participant_disconnected", on_participant_disconnected)
-
     token = (
         api.AccessToken(api_key, api_secret)
         .with_identity("local-audio")
@@ -135,31 +79,15 @@ def on_participant_disconnected(participant: rtc.RemoteParticipant):
 
         # Start dB meter display in a separate thread
         meter_thread = threading.Thread(
-            target=display_dual_db_meters,
-            args=(mic_db_queue, room_db_queue, room.name),
+            target=display_single_db_meter,
+            args=(mic_db_queue,),
+            kwargs={"label": "Mic Level: "},
             daemon=True
         )
         meter_thread.start()
 
-        # Create a monitoring wrapper for the mixer that calculates dB levels
-        # while passing frames through to the player
-        async def monitored_mixer():
-            try:
-                async for frame in mixer:
-                    # Calculate dB level for room audio
-                    samples = list(frame.data)
-                    db_level = calculate_db_level(samples)
-                    try:
-                        room_db_queue.put_nowait(db_level)
-                    except queue.Full:
-                        pass  # Drop if queue is full
-                    # Yield the frame for playback
-                    yield frame
-            except Exception:
-                pass
-
-        # Start playing mixed remote audio with monitoring
-        asyncio.create_task(player.play(monitored_mixer()))
+        # Start playing mixed remote audio (tracks added via event handlers)
+        await player.start()
 
         # Monitor microphone dB levels
         async def monitor_mic_db():
@@ -191,7 +119,6 @@ async def monitor_mic_db():
         pass
     finally:
         await mic.aclose()
-        await mixer.aclose()
         await player.aclose()
         try:
             await room.disconnect()
diff --git a/livekit-rtc/livekit/rtc/media_devices.py b/livekit-rtc/livekit/rtc/media_devices.py
@@ -25,6 +25,9 @@
 from . import AudioSource
 from .audio_frame import AudioFrame
 from .apm import AudioProcessingModule
+from .audio_mixer import AudioMixer
+from .audio_stream import AudioStream
+from .track import Track
 
 """
 Media device helpers built on top of the `sounddevice` library.
@@ -121,6 +124,10 @@ class OutputPlayer:
     When `apm_for_reverse` is provided, this player will feed the same PCM it
     renders (in 10 ms frames) into the APM reverse path so that echo
     cancellation can correlate mic input with speaker output.
+
+    The OutputPlayer includes an internal `AudioMixer` for convenient multi-track
+    playback. Use `add_track()` and `remove_track()` to dynamically manage tracks,
+    then call `start()` to begin playback.
     """
 
     def __init__(
@@ -142,6 +149,10 @@ def __init__(
         self._play_task: Optional[asyncio.Task] = None
         self._running = False
         self._delay_estimator = delay_estimator
+        
+        # Internal mixer for add_track/remove_track API
+        self._mixer: Optional[AudioMixer] = None
+        self._track_streams: dict[str, AudioStream] = {}  # track.sid -> AudioStream
 
         def _callback(outdata: np.ndarray, frame_count: int, time_info: Any, status: Any) -> None:
             # Pull PCM int16 from buffer; zero if not enough
@@ -197,31 +208,133 @@ def _callback(outdata: np.ndarray, frame_count: int, time_info: Any, status: Any
             blocksize=blocksize,
         )
 
-    async def play(self, stream: AsyncIterator[AudioFrame]) -> None:
-        """Render an async iterator of `AudioFrame` to the output device.
+    def add_track(self, track: Track) -> None:
+        """Add an audio track to the internal mixer for playback.
+
+        This creates an `AudioStream` from the track and adds it to the internal
+        mixer. The mixer is created lazily on first track addition. Call `start()`
+        to begin playback of all added tracks.
 
-        The raw PCM data is appended to an internal buffer consumed by the
-        realtime callback. If an APM was supplied, reverse frames are fed for AEC.
+        Args:
+            track: The audio track to add (typically from a remote participant).
+
+        Raises:
+            ValueError: If the track is not an audio track or has already been added.
         """
-        self._running = True
-        self._stream.start()
-        try:
-            async for frame in stream:
-                if not self._running:
-                    break
-                # Append raw PCM bytes for callback consumption
-                self._buffer.extend(frame.data.tobytes())
-        finally:
-            self._running = False
+        if track.sid in self._track_streams:
+            raise ValueError(f"Track {track.sid} already added to player")
+        
+        # Create mixer on first track addition
+        if self._mixer is None:
+            self._mixer = AudioMixer(
+                sample_rate=self._sample_rate,
+                num_channels=self._num_channels
+            )
+        
+        # Create audio stream for this track
+        stream = AudioStream(
+            track,
+            sample_rate=self._sample_rate,
+            num_channels=self._num_channels
+        )
+        
+        self._track_streams[track.sid] = stream
+        self._mixer.add_stream(stream)
+
+    async def remove_track(self, track: Track) -> None:
+        """Remove an audio track from the internal mixer.
+
+        This removes the track's stream from the mixer and closes it.
+
+        Args:
+            track: The audio track to remove.
+        """
+        stream = self._track_streams.pop(track.sid, None)
+        if stream is None:
+            return
+        
+        if self._mixer is not None:
             try:
-                self._stream.stop()
-                self._stream.close()
+                self._mixer.remove_stream(stream)
             except Exception:
                 pass
+        
+        try:
+            await stream.aclose()
+        except Exception:
+            pass
+
+    async def start(self) -> None:
+        """Start playback of all tracks in the internal mixer.
+
+        This begins a background task that consumes frames from the internal mixer
+        and sends them to the output device. Tracks can be added or removed
+        dynamically using `add_track()` and `remove_track()`.
+
+        Raises:
+            RuntimeError: If playback is already started or no mixer is available.
+        """
+        if self._play_task is not None and not self._play_task.done():
+            raise RuntimeError("Playback already started")
+        
+        if self._mixer is None:
+            self._mixer = AudioMixer(
+                sample_rate=self._sample_rate,
+                num_channels=self._num_channels
+            )
+        
+        async def _playback_loop():
+            """Internal playback loop that consumes frames from the mixer."""
+            self._running = True
+            self._stream.start()
+            try:
+                async for frame in self._mixer:
+                    if not self._running:
+                        break
+                    # Append raw PCM bytes for callback consumption
+                    self._buffer.extend(frame.data.tobytes())
+            finally:
+                self._running = False
+                try:
+                    self._stream.stop()
+                    self._stream.close()
+                except Exception:
+                    pass
+        
+        self._play_task = asyncio.create_task(_playback_loop())
 
     async def aclose(self) -> None:
-        """Stop playback and close the output stream."""
+        """Stop playback and close the output stream.
+        
+        This also cleans up all added tracks and the internal mixer.
+        """
         self._running = False
+        
+        # Cancel playback task if running
+        if self._play_task is not None and not self._play_task.done():
+            self._play_task.cancel()
+            try:
+                await self._play_task
+            except asyncio.CancelledError:
+                pass
+        
+        # Clean up all track streams
+        for stream in list(self._track_streams.values()):
+            try:
+                await stream.aclose()
+            except Exception:
+                pass
+        self._track_streams.clear()
+        
+        # Close mixer
+        if self._mixer is not None:
+            try:
+                await self._mixer.aclose()
+            except Exception:
+                pass
+            self._mixer = None
+        
+        # Close output stream
         try:
             self._stream.stop()
             self._stream.close()