Skip to content

Commit 43e595a

Browse files
committed
feat: auto-switch video to active speaker in CurrentStream mode
When LivekitPlayer plays in CurrentStream mode, the video now follows the dominant speaker using room.ActiveSpeakers with a 1.5s debounce to prevent flickering. UserStream mode remains pinned to the specified participant. Also adds docs/cast.md documenting the full cast feature.
1 parent 66e96cc commit 43e595a

File tree

2 files changed

+282
-9
lines changed

2 files changed

+282
-9
lines changed

Explorer/Assets/DCL/SDKComponents/MediaStream/LivekitPlayer.cs

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ public class LivekitPlayer : IDisposable
3434
private LivekitAddress? playingAddress;
3535
private Vector3 audioPosition;
3636

37+
private string? currentVideoIdentity;
38+
private float videoSwitchedAtTime;
39+
private const float MIN_SPEAKER_HOLD_SECONDS = 1.5f;
40+
3741
private bool disposed;
3842

3943
public bool MediaOpened =>
@@ -58,16 +62,22 @@ public void EnsureVideoIsPlaying()
5862
{
5963
if (State != PlayerState.PLAYING) return;
6064
if (playingAddress == null) return;
61-
if (IsVideoOpened) return;
6265

63-
// If a specific user stream died, fallback to current-stream (first available track)
64-
if (playingAddress.Value.IsUserStream(out _))
66+
if (!IsVideoOpened)
6567
{
66-
OpenMedia(LivekitAddress.CurrentStream());
68+
// If a specific user stream died, fallback to current-stream (first available track)
69+
if (playingAddress.Value.IsUserStream(out _))
70+
{
71+
OpenMedia(LivekitAddress.CurrentStream());
72+
return;
73+
}
74+
75+
OpenMedia(playingAddress.Value);
6776
return;
6877
}
6978

70-
OpenMedia(playingAddress.Value);
79+
// Video is alive — try to follow the active speaker (CurrentStream only)
80+
TryFollowActiveSpeaker();
7181
}
7282

7383
public void EnsureAudioIsPlaying()
@@ -98,17 +108,23 @@ public void OpenMedia(LivekitAddress livekitAddress)
98108
{
99109
CloseCurrentStream();
100110

111+
currentVideoIdentity = null;
112+
101113
currentVideoStream = livekitAddress.Match(
102114
this,
103115
onUserStream: static (self, userStream) =>
104-
self.room.VideoStreams.ActiveStream(new StreamKey(userStream.Identity, userStream.Sid)),
105-
onCurrentStream: static self => self.FirstVideo()
116+
{
117+
self.currentVideoIdentity = userStream.Identity;
118+
return self.room.VideoStreams.ActiveStream(new StreamKey(userStream.Identity, userStream.Sid));
119+
},
120+
onCurrentStream: static self => self.FirstVideoTrackingIdentity()
106121
);
107122

108123
OpenAllAudioStreams();
109124

110125
playerState = PlayerState.PLAYING;
111126
playingAddress = livekitAddress;
127+
videoSwitchedAtTime = UnityEngine.Time.realtimeSinceStartup;
112128
}
113129

114130
private void OpenAllAudioStreams()
@@ -151,16 +167,66 @@ private void CollectAllAudioTracks(List<StreamKey> output)
151167
}
152168
}
153169

154-
private Weak<IVideoStream> FirstVideo()
170+
private Weak<IVideoStream> FirstVideoTrackingIdentity()
155171
{
156-
var result = FirstAvailableTrackSid(TrackKind.KindVideo);
172+
StreamKey? result = FirstAvailableTrackSid(TrackKind.KindVideo);
157173

158174
if (result.HasValue == false)
175+
{
176+
currentVideoIdentity = null;
159177
return Weak<IVideoStream>.Null;
178+
}
160179

180+
currentVideoIdentity = result.Value.identity;
161181
return room.VideoStreams.ActiveStream(result.Value);
162182
}
163183

184+
private void TryFollowActiveSpeaker()
185+
{
186+
if (playingAddress!.Value.IsUserStream(out _)) return;
187+
188+
if (UnityEngine.Time.realtimeSinceStartup - videoSwitchedAtTime < MIN_SPEAKER_HOLD_SECONDS) return;
189+
190+
if (room.ActiveSpeakers.Count == 0) return;
191+
192+
string? dominantSpeaker = null;
193+
194+
foreach (string speakerIdentity in room.ActiveSpeakers)
195+
{
196+
dominantSpeaker = speakerIdentity;
197+
break;
198+
}
199+
200+
if (dominantSpeaker == null) return;
201+
if (dominantSpeaker == currentVideoIdentity) return;
202+
203+
StreamKey? videoTrack = FindVideoTrackForParticipant(dominantSpeaker);
204+
205+
if (videoTrack == null) return;
206+
207+
currentVideoStream = room.VideoStreams.ActiveStream(videoTrack.Value);
208+
currentVideoIdentity = dominantSpeaker;
209+
videoSwitchedAtTime = UnityEngine.Time.realtimeSinceStartup;
210+
}
211+
212+
private StreamKey? FindVideoTrackForParticipant(string identity)
213+
{
214+
lock (room.Participants)
215+
{
216+
var participant = room.Participants.RemoteParticipant(identity);
217+
218+
if (participant == null) return null;
219+
220+
foreach ((string sid, TrackPublication track) in participant.Tracks)
221+
{
222+
if (track.Kind == TrackKind.KindVideo)
223+
return new StreamKey(identity, sid);
224+
}
225+
}
226+
227+
return null;
228+
}
229+
164230
private StreamKey? FirstAvailableTrackSid(TrackKind kind)
165231
{
166232
// See: https://github.com/decentraland/unity-explorer/issues/3796
@@ -241,6 +307,7 @@ public void CloseCurrentStream()
241307
{
242308
// Doesn't need to dispose the stream, because it's responsibility of the owning room.
243309
currentVideoStream = null;
310+
currentVideoIdentity = null;
244311
playerState = PlayerState.STOPPED;
245312
ReleaseAllAudioSources();
246313
}

docs/cast.md

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Cast — LiveKit Media Streaming
2+
3+
This document explains how live video and audio streaming works in the explorer via LiveKit rooms.
4+
5+
## Overview
6+
7+
The cast feature allows scenes to display live video/audio streams from LiveKit rooms. A scene places a `PBVideoPlayer` or `PBAudioStream` SDK component on an entity with a `livekit-video://` URL, and the explorer connects to the room and routes media to the in-world screen.
8+
9+
Two player backends exist side by side:
10+
11+
| Backend | URL scheme | Use case |
12+
|---------|-----------|----------|
13+
| **AvProPlayer** | `http://`, `https://` | Pre-recorded or HLS video |
14+
| **LivekitPlayer** | `livekit-video://` | Real-time room streams |
15+
16+
The `MultiMediaPlayer` REnum wraps both behind a unified interface so the ECS systems don't care which backend is active.
17+
18+
---
19+
20+
## Address Types — `LivekitAddress`
21+
22+
`LivekitAddress` is an REnum (discriminated union) with two variants:
23+
24+
### CurrentStream
25+
26+
```
27+
livekit-video://current-stream
28+
```
29+
30+
Picks the first available video track in the room — and then **follows the active speaker** (see [Active Speaker Tracking](#active-speaker-tracking-video-follows-voice) below). This is the default mode for streaming theatre screens.
31+
32+
### UserStream
33+
34+
```
35+
livekit-video://{identity}/{sid}
36+
```
37+
38+
Pins to a specific participant's track by identity and stream ID. No automatic switching occurs.
39+
40+
Defined in `LivekitAddress.cs`. Helper extensions in `LiveKitMediaExtensions.cs` handle parsing.
41+
42+
---
43+
44+
## Video Routing
45+
46+
### How the first video track is selected
47+
48+
When `OpenMedia()` is called:
49+
50+
- **CurrentStream**`FirstVideoTrackingIdentity()` iterates all remote participants (under lock) and returns the first video track found. The participant's identity is stored in `currentVideoIdentity`.
51+
- **UserStream** → Directly opens the stream for the specified `(identity, sid)`.
52+
53+
### Active Speaker Tracking (video-follows-voice)
54+
55+
In `CurrentStream` mode, the video automatically switches to whoever is speaking. This is driven by `TryFollowActiveSpeaker()`, which runs every frame inside `EnsureVideoIsPlaying()`.
56+
57+
**How it works:**
58+
59+
1. `room.ActiveSpeakers` (provided by the LiveKit SDK) is an ordered collection of participant identities currently speaking — first element = highest audio level.
60+
2. Each frame, `TryFollowActiveSpeaker()` reads the dominant speaker.
61+
3. If the dominant speaker differs from the current video identity **and** enough time has passed since the last switch, the video stream is swapped.
62+
63+
**Debounce:** A minimum hold time of **1.5 seconds** (`MIN_SPEAKER_HOLD_SECONDS`) prevents flickering during rapid speaker changes.
64+
65+
**Fallback rules:**
66+
67+
| Scenario | Behavior |
68+
|----------|----------|
69+
| Active speaker has no video track | Keep current video |
70+
| No one is speaking | Keep current video |
71+
| Rapid speaker changes (<1.5s) | Debounced — stays on current |
72+
| UserStream mode | No auto-switching (early return) |
73+
74+
**Key methods in `LivekitPlayer.cs`:**
75+
76+
- `FirstVideoTrackingIdentity()` — Selects first video track and records identity
77+
- `TryFollowActiveSpeaker()` — Core speaker-tracking logic with debounce
78+
- `FindVideoTrackForParticipant(identity)` — Looks up a participant's video track by identity
79+
80+
---
81+
82+
## Audio Routing
83+
84+
Audio is handled independently from video.
85+
86+
### All tracks play simultaneously
87+
88+
`OpenAllAudioStreams()` iterates **every remote participant** in the room and opens **every audio track** it finds. Each track gets its own pooled `LivekitAudioSource` from a `ThreadSafeObjectPool`. This means:
89+
90+
- All participants' microphones are heard at once (like a conference call).
91+
- Audio is **not** tied to the currently displayed video — you always hear everyone.
92+
- Volume and spatial positioning are applied uniformly to all sources.
93+
94+
### Spatial audio
95+
96+
When the SDK component has `spatial = true`, audio sources are positioned in 3D space via `PlaceAudioAt(position)`. Min/max distance is configured through the SDK component fields.
97+
98+
### Paired audio (reserved)
99+
100+
`FindPairedAudio()` maps a video track to its companion audio track (camera → microphone, screenshare → screenshare audio). This exists for future use but is not currently active — all audio plays regardless.
101+
102+
---
103+
104+
## Stream Recovery (Self-Healing)
105+
106+
Both video and audio streams can die at any time (participant disconnects, network issues). The system self-heals via two methods called every frame from `UpdateMediaPlayerSystem`:
107+
108+
### `EnsureVideoIsPlaying()`
109+
110+
```
111+
Video dead + UserStream mode → Fallback to CurrentStream (first available track)
112+
Video dead + CurrentStream mode → Re-open CurrentStream
113+
Video alive + CurrentStream mode → TryFollowActiveSpeaker()
114+
```
115+
116+
### `EnsureAudioIsPlaying()`
117+
118+
```
119+
Any audio source dead → Release all, re-collect all audio tracks
120+
All audio alive → No action
121+
```
122+
123+
This means if a participant leaves and rejoins, or a new participant joins, the audio will automatically pick them up on the next recovery cycle.
124+
125+
---
126+
127+
## System Architecture
128+
129+
### ECS Systems
130+
131+
| System | Group | Responsibility |
132+
|--------|-------|---------------|
133+
| `CreateMediaPlayerSystem` | ComponentInstantiation | Detects new `PBVideoPlayer`/`PBAudioStream` components, creates `MediaPlayerComponent` with appropriate backend |
134+
| `UpdateMediaPlayerSystem` | SyncedPresentation | Drives playback each frame — calls `EnsureVideoIsPlaying()`, `EnsureAudioIsPlaying()`, handles volume crossfading, texture updates |
135+
| `CleanUpMediaPlayerSystem` | CleanUp | Disposes players when entities/components are removed |
136+
137+
### Factory
138+
139+
`MediaFactory` (built by `MediaFactoryBuilder` per scene) decides which backend to create based on the URL scheme. It holds a reference to the scene's `IRoom` from `IRoomHub`.
140+
141+
### Component
142+
143+
`MediaPlayerComponent` wraps a `MultiMediaPlayer` (which is either `AvProPlayer` or `LivekitPlayer`). It also tracks frozen-stream detection and audio visualization buffers.
144+
145+
---
146+
147+
## SDK Integration
148+
149+
### How a scene triggers streaming
150+
151+
1. Scene SDK sends a `PBVideoPlayer` component with `src = "livekit-video://current-stream"` (or a specific user address).
152+
2. `CreateMediaPlayerSystem` picks it up, calls `MediaAddress.New()` which detects the `livekit-video://` prefix.
153+
3. `MediaFactory` creates a `LivekitPlayer` backed by the scene's LiveKit room.
154+
4. `UpdateMediaPlayerSystem` drives it every frame.
155+
156+
### `getActiveVideoStreams` API
157+
158+
Scenes can query available streams via `CommsApiWrap.GetActiveVideoStreams()`. The response includes:
159+
160+
```json
161+
{
162+
"streams": [
163+
{
164+
"identity": "participant-id",
165+
"trackSid": "livekit-video://identity/sid",
166+
"sourceType": "VTST_CAMERA",
167+
"name": "Display Name",
168+
"speaking": true,
169+
"trackName": "video",
170+
"width": 1920,
171+
"height": 1080
172+
}
173+
]
174+
}
175+
```
176+
177+
A synthetic `current-stream` entry is always included, pointing to the first available participant.
178+
179+
### CastV2 — Display Name Resolution
180+
181+
Participants joining via castV2 (unauthenticated web viewers) may not have a `Name` field. Display name is resolved with this fallback chain:
182+
183+
```
184+
Participant.Metadata.displayName → Participant.Name → Participant.Identity
185+
```
186+
187+
Metadata is a JSON string parsed at query time.
188+
189+
---
190+
191+
## Key Files
192+
193+
| File | Role |
194+
|------|------|
195+
| `SDKComponents/MediaStream/LivekitPlayer.cs` | Core player — video/audio routing, speaker tracking, recovery |
196+
| `SDKComponents/MediaStream/LivekitAddress.cs` | `CurrentStream` / `UserStream` address REnum |
197+
| `SDKComponents/MediaStream/MultiMediaPlayer.cs` | Unified wrapper over AvPro and Livekit backends |
198+
| `SDKComponents/MediaStream/MediaPlayerComponent.cs` | ECS component holding the player |
199+
| `SDKComponents/MediaStream/Systems/UpdateMediaPlayerSystem.cs` | Per-frame system driving playback |
200+
| `SDKComponents/MediaStream/Systems/CreateMediaPlayerSystem.cs` | System creating players from SDK components |
201+
| `SDKComponents/MediaStream/Systems/CleanUpMediaPlayerSystem.cs` | Disposal system |
202+
| `SDKComponents/MediaStream/MediaFactory.cs` | Factory choosing backend by URL |
203+
| `SDKComponents/MediaStream/LiveKitMediaExtensions.cs` | URL parsing helpers |
204+
| `Infrastructure/.../CommsApi/CommsApiWrap.cs` | `getActiveVideoStreams` API |
205+
| `Infrastructure/.../CommsApi/GetActiveVideoStreamsResponse.cs` | Response builder with display name resolution |
206+
| `Multiplayer/Connections/Rooms/ParticipantExtensions.cs` | Address construction from participants |

0 commit comments

Comments
 (0)