55# LICENSE file in the root directory of this source tree.
66
77"""
8- ========================================================================
9- Decoding with custom_frame_mappings: Performance and accuracy comparison
10- ========================================================================
8+ ====================================
9+ Decoding with custom frame mappings
10+ ====================================
1111
1212In this example, we will describe the ``custom_frame_mappings`` parameter of the
1313:class:`~torchcodec.decoders.VideoDecoder` class.
14-
1514This parameter allows you to provide pre-computed frame mapping information to
1615speed up :class:`~torchcodec.decoders.VideoDecoder` instantiation, while
1716maintaining the frame seeking accuracy of ``seek_mode="exact"``.
1817
1918This makes it ideal for workflows where:
20- 1. accuracy is critical, so ``seek_mode="approximate"`` cannot be used
21- 2. the videos can be preprocessed once and then decoded many times.
19+
20+ 1. Frame accuracy is critical, so :doc:`approximate mode <approximate_mode>` cannot be used
21+ 2. Videos can be preprocessed once and then decoded many times
2222"""
2323
2424# %%
25- # First, let's set up our test videos: we'll download a short video and
26- # use ffmpeg to create a longer version by repeating it multiple times.
25+ # First, some boilerplate: we'll download a short video from the web, and
26+ # use ffmpeg to create a longer version by repeating it multiple times. We'll end up
27+ # with two videos: a short one of approximately 3 minutes and a long one of about 13 minutes.
28+ # You can ignore that part and jump right below to :ref:`frame_mappings_creation`.
2729
2830import tempfile
2931from pathlib import Path
6264# -------------------------------------------
6365#
6466# The key to using custom frame mappings is preprocessing your videos to extract
65- # frame timing information, and whether or not a frame is a keyframe information.
66- # We use ffprobe to generate JSON files containing this metadata.
67+ # frame timing information and keyframe indicators. We use ffprobe to generate
68+ # JSON files containing this metadata.
6769
6870from pathlib import Path
6971import subprocess
7577long_json_path = Path (temp_dir ) / "long_custom_frame_mappings.json"
7678short_json_path = Path (temp_dir ) / "short_custom_frame_mappings.json"
7779
78- ffprobe_cmd = ["ffprobe" , "-i" , f"{ long_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pts,duration ,key_frame" , "-of" , "json" ]
80+ ffprobe_cmd = ["ffprobe" , "-i" , f"{ long_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration ,key_frame" , "-of" , "json" ]
7981ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
8082with open (long_json_path , "w" ) as f :
8183 f .write (ffprobe_result .stdout )
8284 print (f"Wrote { len (ffprobe_result .stdout )} characters to { long_json_path } " )
8385
84- ffprobe_cmd = ["ffprobe" , "-i" , f"{ short_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pts,duration ,key_frame" , "-of" , "json" ]
86+ ffprobe_cmd = ["ffprobe" , "-i" , f"{ short_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration ,key_frame" , "-of" , "json" ]
8587ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
8688with open (short_json_path , "w" ) as f :
8789 f .write (ffprobe_result .stdout )
9092# %%
9193# .. _perf_creation:
9294#
93- # Performance: ``VideoDecoder`` creation with custom frame mappings
94- # -----------------------------------------------------------------
95+ # Performance: ``VideoDecoder`` creation
96+ # --------------------------------------
9597#
96- # Let's define a benchmarking function to measure performance. Note that when using
97- # file-like objects for custom_frame_mappings, we need to seek back to the beginning
98- # between iterations since the JSON data is consumed during VideoDecoder creation.
98+ # In terms of performance, custom frame mappings ultimately affect the
99+ # **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
100+ # longer the video, the higher the performance gain.
101+ # Let's define a benchmarking function to measure performance.
102+ # Note that when using file-like objects for custom_frame_mappings, we need to
103+ # seek back to the beginning between iterations since the JSON data is consumed
104+ # during VideoDecoder creation.
99105
100106import torch
101107
@@ -141,9 +147,10 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
141147# Performance: Frame decoding with custom frame mappings
142148# ------------------------------------------------------
143149#
144- # The performance benefits extend to frame decoding operations as well, since
145- # each decoding workflow typically involves creating a VideoDecoder instance.
146- # Let's compare frame decoding performance between the two approaches.
150+ # Although the custom_frame_mappings parameter only affects the performance of
151+ # the :class:`~torchcodec.decoders.VideoDecoder` creation, decoding workflows
152+ # typically involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance.
153+ # As a result, the performance benefits of custom_frame_mappings can be seen.
147154
148155
149156def decode_frames (video_path , seek_mode = "exact" , custom_frame_mappings = None ):
@@ -165,25 +172,22 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
165172 bench (decode_frames , video_path = video_path , seek_mode = "exact" )
166173
167174# %%
168- # Accuracy: High accuracy frame seeking with custom frame mappings
169- # ----------------------------------------------------------------
175+ # Accuracy: Metadata and frame retrieval
176+ # --------------------------------------
170177#
171- # The main advantage of using custom frame mappings over approximate mode is that
172- # frame seeking accuracy is as high as exact mode.
173-
174- video_path = long_video_path
175- json_path = long_json_path
176- with open (json_path , "r" ) as f :
177- custom_frame_mappings_decoder = VideoDecoder (
178- source = video_path ,
179- custom_frame_mappings = f ,
180- stream_index = 0
181- )
182-
183- exact_decoder = VideoDecoder (video_path , seek_mode = "exact" , stream_index = 0 )
184- approx_decoder = VideoDecoder (video_path , seek_mode = "approximate" , stream_index = 0 )
185-
186- print ("Comparing frames between exact seek mode decoder and custom_frame_mappings decoder:" )
178+ # We've seen that using custom frame mappings can significantly speed up
179+ # the :class:`~torchcodec.decoders.VideoDecoder` creation. The advantage is that
180+ # seeking is still as accurate as with ``seek_mode="exact"``.
181+
182+ print ("Metadata of short video with custom_frame_mappings:" )
183+ with open (short_json_path , "r" ) as f :
184+ print (VideoDecoder (short_video_path , custom_frame_mappings = f ).metadata )
185+ print ("Metadata of short video with seek_mode='exact':" )
186+ print (VideoDecoder (short_video_path , seek_mode = "exact" ).metadata )
187+
188+ with open (short_json_path , "r" ) as f :
189+ custom_frame_mappings_decoder = VideoDecoder (short_video_path , custom_frame_mappings = f )
190+ exact_decoder = VideoDecoder (short_video_path , seek_mode = "exact" )
187191for i in range (len (exact_decoder )):
188192 torch .testing .assert_close (
189193 exact_decoder .get_frame_at (i ).data ,
@@ -203,10 +207,11 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
203207# as a JSON, it eliminates the need for the expensive scan while preserving all the
204208# accuracy benefits.
205209#
206- # Which approach should I use?
207- # ----------------------------
210+ # Which mode should I use?
211+ # ------------------------
208212#
209- # - For fastest decoding, "approximate" mode is strongly recommended.
213+ # - For fastest decoding when speed is more important than exact seeking accuracy,
214+ # "approximate" mode is recommended.
210215#
211216# - For exact frame seeking, custom frame mappings will benefit workflows where the
212217# same videos are decoded repeatedly, and some preprocessing work can be done.
0 commit comments