@@ -56,31 +56,38 @@ def __init__(self, media_file: Union[str, Path]) -> None:
5656 def info (self ) -> MediaInfo :
5757 return self ._info
5858
59- async def stream_video (self ) -> AsyncIterable [rtc .VideoFrame ]:
59+ async def stream_video (self ) -> AsyncIterable [tuple [ rtc .VideoFrame , float ] ]:
6060 """Streams video frames from the media file in an endless loop."""
61- for av_frame in self ._video_container .decode (video = 0 ):
61+ for i , av_frame in enumerate ( self ._video_container .decode (video = 0 ) ):
6262 # Convert video frame to RGBA
6363 frame = av_frame .to_rgb ().to_ndarray ()
6464 frame_rgba = np .ones ((frame .shape [0 ], frame .shape [1 ], 4 ), dtype = np .uint8 )
6565 frame_rgba [:, :, :3 ] = frame
66- yield rtc .VideoFrame (
67- width = frame .shape [1 ],
68- height = frame .shape [0 ],
69- type = rtc .VideoBufferType .RGBA ,
70- data = frame_rgba .tobytes (),
66+ yield (
67+ rtc .VideoFrame (
68+ width = frame .shape [1 ],
69+ height = frame .shape [0 ],
70+ type = rtc .VideoBufferType .RGBA ,
71+ data = frame_rgba .tobytes (),
72+ ),
73+ av_frame .time ,
7174 )
7275
73- async def stream_audio (self ) -> AsyncIterable [rtc .AudioFrame ]:
76+ async def stream_audio (self ) -> AsyncIterable [tuple [ rtc .AudioFrame , float ] ]:
7477 """Streams audio frames from the media file in an endless loop."""
7578 for av_frame in self ._audio_container .decode (audio = 0 ):
7679 # Convert audio frame to raw int16 samples
7780 frame = av_frame .to_ndarray ().T # Transpose to (samples, channels)
7881 frame = (frame * 32768 ).astype (np .int16 )
79- yield rtc .AudioFrame (
80- data = frame .tobytes (),
81- sample_rate = self .info .audio_sample_rate ,
82- num_channels = frame .shape [1 ],
83- samples_per_channel = frame .shape [0 ],
82+ duration = len (frame ) / self .info .audio_sample_rate
83+ yield (
84+ rtc .AudioFrame (
85+ data = frame .tobytes (),
86+ sample_rate = self .info .audio_sample_rate ,
87+ num_channels = frame .shape [1 ],
88+ samples_per_channel = frame .shape [0 ],
89+ ),
90+ av_frame .time + duration ,
8491 )
8592
8693 def reset (self ):
@@ -102,6 +109,7 @@ async def main(room: rtc.Room, room_name: str, media_path: str):
102109 api .VideoGrants (
103110 room_join = True ,
104111 room = room_name ,
112+ agent = True ,
105113 )
106114 )
107115 .to_jwt ()
@@ -121,7 +129,7 @@ async def main(room: rtc.Room, room_name: str, media_path: str):
121129 media_info = streamer .info
122130
123131 # Create video and audio sources/tracks
124- queue_size_ms = 1000 # 1 second
132+ queue_size_ms = 1000
125133 video_source = rtc .VideoSource (
126134 width = media_info .video_width ,
127135 height = media_info .video_height ,
@@ -157,26 +165,54 @@ async def main(room: rtc.Room, room_name: str, media_path: str):
157165 )
158166
159167 async def _push_frames (
160- stream : AsyncIterable [rtc .VideoFrame | rtc .AudioFrame ],
168+ stream : AsyncIterable [tuple [ rtc .VideoFrame | rtc .AudioFrame , float ] ],
161169 av_sync : rtc .AVSynchronizer ,
162170 ):
163- async for frame in stream :
164- await av_sync .push (frame )
171+ async for frame , timestamp in stream :
172+ await av_sync .push (frame , timestamp )
165173 await asyncio .sleep (0 )
166174
175+ async def _log_fps (av_sync : rtc .AVSynchronizer ):
176+ start_time = asyncio .get_running_loop ().time ()
177+ while True :
178+ await asyncio .sleep (2 )
179+ wall_time = asyncio .get_running_loop ().time () - start_time
180+ diff = av_sync .last_video_time - av_sync .last_audio_time
181+ logger .info (
182+ f"fps: { av_sync .actual_fps :.2f} , wall_time: { wall_time :.3f} s, "
183+ f"video_time: { av_sync .last_video_time :.3f} s, "
184+ f"audio_time: { av_sync .last_audio_time :.3f} s, diff: { diff :.3f} s"
185+ )
186+
167187 try :
168188 while True :
169189 streamer .reset ()
170- video_task = asyncio .create_task (
171- _push_frames (streamer .stream_video (), av_sync )
172- )
173- audio_task = asyncio .create_task (
174- _push_frames (streamer .stream_audio (), av_sync )
190+
191+ video_stream = streamer .stream_video ()
192+ audio_stream = streamer .stream_audio ()
193+
194+ # read the head frames and push them at the same time
195+ first_video_frame , video_timestamp = await video_stream .__anext__ ()
196+ first_audio_frame , audio_timestamp = await audio_stream .__anext__ ()
197+ logger .info (
198+ f"first video duration: { 1 / media_info .video_fps :.3f} s, "
199+ f"first audio duration: { first_audio_frame .duration :.3f} s"
175200 )
201+ await av_sync .push (first_video_frame , video_timestamp )
202+ await av_sync .push (first_audio_frame , audio_timestamp )
203+
204+ video_task = asyncio .create_task (_push_frames (video_stream , av_sync ))
205+ audio_task = asyncio .create_task (_push_frames (audio_stream , av_sync ))
206+
207+ log_fps_task = asyncio .create_task (_log_fps (av_sync ))
176208
177209 # wait for both tasks to complete
178210 await asyncio .gather (video_task , audio_task )
179211 await av_sync .wait_for_playout ()
212+
213+ # clean up
214+ av_sync .reset ()
215+ log_fps_task .cancel ()
180216 logger .info ("playout finished" )
181217 finally :
182218 await streamer .aclose ()
0 commit comments