1111
1212In this example, we will describe the ``custom_frame_mappings`` parameter of the
1313:class:`~torchcodec.decoders.VideoDecoder` class.
14+
15+ This parameter allows you to provide pre-computed frame mapping information to
16+ speed up :class:`~torchcodec.decoders.VideoDecoder` instantiation, while
17+ maintaining the frame seeking accuracy of ``seek_mode="exact"``.
18+
19+ This makes it ideal for workflows where:
20+ 1. accuracy is critical, so ``seek_mode="approximate"`` cannot be used
21+ 2. the videos can be preprocessed once and then decoded many times.
1422"""
1523
1624# %%
17- # Create an HD video using ffmpeg and use the ffmpeg CLI to repeat it 10 times
18- # to get two videos: a short video of approximately 30 seconds and a long one of about 10 mins .
25+ # First, let's set up our test videos: we'll download a short video and
26+ # use ffmpeg to create a longer version by repeating it multiple times .
1927
2028import tempfile
2129from pathlib import Path
2230import subprocess
23- from torchcodec .decoders import VideoDecoder
31+ import requests
32+
33+ url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"
34+ response = requests .get (url , headers = {"User-Agent" : "" })
35+ if response .status_code != 200 :
36+ raise RuntimeError (f"Failed to download video. { response .status_code = } ." )
2437
2538temp_dir = tempfile .mkdtemp ()
2639short_video_path = Path (temp_dir ) / "short_video.mp4"
27-
28- ffmpeg_generate_video_command = [
29- "ffmpeg" ,
30- "-y" ,
31- "-f" , "lavfi" ,
32- "-i" , "mandelbrot=s=1920x1080" ,
33- "-t" , "30" ,
34- "-c:v" , "h264" ,
35- "-r" , "60" ,
36- "-g" , "600" ,
37- "-pix_fmt" , "yuv420p" ,
38- f"{ short_video_path } "
39- ]
40- subprocess .run (ffmpeg_generate_video_command )
40+ with open (short_video_path , 'wb' ) as f :
41+ for chunk in response .iter_content ():
42+ f .write (chunk )
4143
4244long_video_path = Path (temp_dir ) / "long_video.mp4"
4345ffmpeg_command = [
4446 "ffmpeg" ,
45- "-stream_loop" , "20 " , # repeat video 20 times to get a 10 min video
47+ "-stream_loop" , "3 " , # repeat video 3 times to get a ~13 min video
4648 "-i" , f"{ short_video_path } " ,
4749 "-c" , "copy" ,
4850 f"{ long_video_path } "
4951]
50- subprocess .run (ffmpeg_command )
52+ subprocess .run (ffmpeg_command , check = True , stdout = subprocess . PIPE , stderr = subprocess . PIPE )
5153
54+ from torchcodec .decoders import VideoDecoder
5255print (f"Short video duration: { VideoDecoder (short_video_path ).metadata .duration_seconds } seconds" )
5356print (f"Long video duration: { VideoDecoder (long_video_path ).metadata .duration_seconds / 60 } minutes" )
5457
5558# %%
56- # Preprocessing step to create frame mappings for the videos using ffprobe.
59+ # .. _frame_mappings_creation:
60+ #
61+ # Creating custom frame mappings with ffprobe
62+ # --------------------------------------------
63+ #
64+ # The key to using custom frame mappings is preprocessing your videos to extract
65+ # frame timing information, and whether or not a frame is a keyframe information.
66+ # We use ffprobe to generate JSON files containing this metadata.
5767
5868from pathlib import Path
5969import subprocess
6272
6373stream_index = 0
6474
65- temp_dir = tempfile .mkdtemp ()
6675long_json_path = Path (temp_dir ) / "long_custom_frame_mappings.json"
6776short_json_path = Path (temp_dir ) / "short_custom_frame_mappings.json"
6877
7988 print (f"Wrote { len (ffprobe_result .stdout )} characters to { short_json_path } " )
8089
8190# %%
82- # Define benchmarking function. When a file_like object is passed in, its necessary to seek
83- # to the beginning of the file before reading it in the next iteration.
91+ # .. _perf_creation:
92+ #
93+ # Performance: ``VideoDecoder`` creation with custom frame mappings
94+ # ------------------------------------------------------------------
95+ #
96+ # Let's define a benchmarking function to measure performance. Note that when using
97+ # file-like objects for custom_frame_mappings, we need to seek back to the beginning
98+ # between iterations since the JSON data is consumed during VideoDecoder creation.
8499
85100import torch
86101
@@ -106,16 +121,15 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
106121 print (f"{ med = :.2f} ms +- { std :.2f} " )
107122
108123# %%
109- # Compare performance of initializing VideoDecoder with custom_frame_mappings vs exact seek_mode
124+ # Now let's compare the performance of creating VideoDecoder objects with custom
125+ # frame mappings versus the exact seek mode. You'll see that custom
126+ # frame mappings provide significant speedups, especially for longer videos.
110127
111128
112129for video_path , json_path in ((short_video_path , short_json_path ), (long_video_path , long_json_path )):
113130 print (f"Running benchmarks on { Path (video_path ).name } " )
114- print ("Creating a VideoDecoder object with custom_frame_mappings JSON str from file:" )
115- with open (json_path , "r" ) as f :
116- bench (VideoDecoder , source = video_path , stream_index = stream_index , custom_frame_mappings = (f .read ()))
117131
118- print ("Creating a VideoDecoder object with custom_frame_mappings from filelike :" )
132+ print ("Creating a VideoDecoder object with custom_frame_mappings:" )
119133 with open (json_path , "r" ) as f :
120134 bench (VideoDecoder , file_like = True , source = video_path , stream_index = stream_index , custom_frame_mappings = f )
121135
@@ -124,7 +138,12 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
124138 bench (VideoDecoder , source = video_path , stream_index = stream_index , seek_mode = "exact" )
125139
126140# %%
127- # Decode frames with custom_frame_mappings vs exact seek_mode
141+ # Performance: Frame decoding with custom frame mappings
142+ # --------------------------------------------------------
143+ #
144+ # The performance benefits extend to frame decoding operations as well, since
145+ # each decoding workflow typically involves creating a VideoDecoder instance.
146+ # Let's compare frame decoding performance between the two approaches.
128147
129148
130149def decode_frames (video_path , seek_mode = "exact" , custom_frame_mappings = None ):
@@ -140,38 +159,59 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
140159 print (f"Running benchmarks on { Path (video_path ).name } " )
141160 print ("Decoding frames with custom_frame_mappings JSON str from file:" )
142161 with open (json_path , "r" ) as f :
143- bench (decode_frames , video_path = video_path , custom_frame_mappings = ( f . read ()) )
162+ bench (decode_frames , file_like = True , video_path = video_path , custom_frame_mappings = f )
144163
145164 print ("Decoding frames with seek_mode='exact':" )
146165 bench (decode_frames , video_path = video_path , seek_mode = "exact" )
147166
148167# %%
149- # Compare frame accuracy with custom_frame_mappings vs exact seek_mode
150- video_path = short_video_path
151- json_path = short_json_path
168+ # Accuracy: High accuracy frame seeking with custom frame mappings
169+ # -----------------------------------------------------------
170+ #
171+ # The main advantage of using custom frame mappings over approximate mode is that
172+ # frame seeking accuracy is as high as exact mode.
173+
174+ video_path = long_video_path
175+ json_path = long_json_path
152176with open (json_path , "r" ) as f :
153- custom_frame_mappings = f .read ()
154177 custom_frame_mappings_decoder = VideoDecoder (
155178 source = video_path ,
156- custom_frame_mappings = custom_frame_mappings
179+ custom_frame_mappings = f ,
180+ stream_index = 0
157181 )
158182
159- exact_decoder = VideoDecoder (short_video_path , seek_mode = "exact" )
160- approx_decoder = VideoDecoder (short_video_path , seek_mode = "approximate" )
183+ exact_decoder = VideoDecoder (video_path , seek_mode = "exact" , stream_index = 0 )
184+ approx_decoder = VideoDecoder (video_path , seek_mode = "approximate" , stream_index = 0 )
161185
162- print ("Metadata of short video with custom_frame_mappings:" )
163- print (custom_frame_mappings_decoder .metadata )
164- print ("Metadata of short video with seek_mode='exact':" )
165- print (exact_decoder .metadata )
166- print ("Metadata of short video with seek_mode='approximate':" )
167- print (approx_decoder .metadata )
168-
169- for i in range (len (approx_decoder )):
186+ print ("Comparing frames between exact seek mode decoder and custom_frame_mappings decoder:" )
187+ for i in range (len (exact_decoder )):
170188 torch .testing .assert_close (
171- approx_decoder .get_frame_at (i ).data ,
189+ exact_decoder .get_frame_at (i ).data ,
172190 custom_frame_mappings_decoder .get_frame_at (i ).data ,
173191 atol = 0 , rtol = 0 ,
174192 )
175193print ("Frame seeking is the same for this video!" )
176194
177195# %%
196+ # How do custom_frame_mappings help?
197+ # ----------------------------------
198+ #
199+ # Custom frame mappings contain the same frame index information
200+ # that would normally be computed during the :term:`scan` operation in exact mode.
201+ # (frame presentation timestamps (PTS), durations, and keyframe indicators)
202+ # By providing this information to the :class:`~torchcodec.decoders.VideoDecoder`
203+ # as a JSON, it eliminates the need for the expensive scan while preserving all the
204+ # accuracy benefits.
205+ #
206+ # Which approach should I use?
207+ # -----------------------------
208+ #
209+ # - For fastest decoding, "approximate" mode is strongly recommended.
210+ #
211+ # - For exact frame seeking, custom frame mappings will benefit workflows where the
212+ # same videos are decoded repeatedly, and some preprocessing work can be done.
213+ #
214+ # - For exact frame seeking without preprocessing, use "exact" mode.
215+ #
216+
217+ # %%
0 commit comments