2525# First, some boilerplate: we'll download a short video from the web, and
2626# use ffmpeg to create a longer version by repeating it multiple times. We'll end up
2727# with two videos: a short one of approximately 3 minutes and a long one of about 13 minutes.
28- # You can ignore that part and jump right below to :ref:`frame_mappings_creation`.
28+ # You can ignore this part and skip below to :ref:`frame_mappings_creation`.
2929
3030import tempfile
3131from pathlib import Path
3232import subprocess
3333import requests
3434
35- url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4 .mp4"
35+ url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small .mp4"
3636response = requests .get (url , headers = {"User-Agent" : "" })
3737if response .status_code != 200 :
3838 raise RuntimeError (f"Failed to download video. { response .status_code = } ." )
6363# Creating custom frame mappings with ffprobe
6464# -------------------------------------------
6565#
66- # The key to using custom frame mappings is preprocessing your videos to extract
67- # frame timing information and keyframe indicators. We use ffprobe to generate
68- # JSON files containing this metadata.
66+ # To generate JSON files containing the required video metadata, we recommend using ffprobe.
67+ # The following frame metadata fields are needed
68+ # (the ``pkt_`` prefix is needed for older versions of FFmpeg):
69+ #
70+ # - ``pts`` / ``pkt_pts``: Presentation timestamps for each frame
71+ # - ``duration`` / ``pkt_duration``: Duration of each frame
72+ # - ``key_frame``: Boolean indicating which frames are key frames
6973
7074from pathlib import Path
7175import subprocess
7276import tempfile
7377from time import perf_counter_ns
78+ import json
7479
7580stream_index = 0
76-
7781long_json_path = Path (temp_dir ) / "long_custom_frame_mappings.json"
7882short_json_path = Path (temp_dir ) / "short_custom_frame_mappings.json"
7983
8084ffprobe_cmd = ["ffprobe" , "-i" , f"{ long_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration,key_frame" , "-of" , "json" ]
8185ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
8286with open (long_json_path , "w" ) as f :
8387 f .write (ffprobe_result .stdout )
84- print (f"Wrote { len (ffprobe_result .stdout )} characters to { long_json_path } " )
8588
8689ffprobe_cmd = ["ffprobe" , "-i" , f"{ short_video_path } " , "-select_streams" , f"{ stream_index } " , "-show_frames" , "-show_entries" , "frame=pkt_pts,pkt_duration,key_frame" , "-of" , "json" ]
8790ffprobe_result = subprocess .run (ffprobe_cmd , check = True , capture_output = True , text = True )
8891with open (short_json_path , "w" ) as f :
8992 f .write (ffprobe_result .stdout )
90- print (f"Wrote { len (ffprobe_result .stdout )} characters to { short_json_path } " )
93+
94+ sample_data = json .loads (ffprobe_result .stdout )
95+ print ("Data structure of custom frame mappings:" )
96+ for frame in sample_data ["frames" ][:3 ]:
97+ print (f"{ frame } " )
9198
9299# %%
93- # .. _perf_creation :
100+ # .. _custom_frame_mappings_perf_creation :
94101#
95102# Performance: ``VideoDecoder`` creation
96103# --------------------------------------
97104#
98- # In terms of performance, custom frame mappings ultimately affect the
99- # **creation** of a :class:`~torchcodec.decoders.VideoDecoder` object. The
100- # longer the video, the higher the performance gain.
101- # Let's define a benchmarking function to measure performance.
102- # Note that when using file-like objects for custom_frame_mappings, we need to
103- # seek back to the beginning between iterations since the JSON data is consumed
104- # during VideoDecoder creation.
105+ # Custom frame mappings affect the **creation** of a :class:`~torchcodec.decoders.VideoDecoder`
106+ # object. As video length increases, the performance gain compared to exact mode increases.
107+ #
105108
106109import torch
107110
@@ -126,31 +129,26 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
126129 med = times .median ().item ()
127130 print (f"{ med = :.2f} ms +- { std :.2f} " )
128131
129- # %%
130- # Now let's compare the performance of creating VideoDecoder objects with custom
131- # frame mappings versus the exact seek mode. You'll see that custom
132- # frame mappings provide significant speedups, especially for longer videos.
133-
134132
135133for video_path , json_path in ((short_video_path , short_json_path ), (long_video_path , long_json_path )):
136- print (f"Running benchmarks on { Path (video_path ).name } " )
134+ print (f"\n Running benchmarks on { Path (video_path ).name } " )
137135
138136 print ("Creating a VideoDecoder object with custom_frame_mappings:" )
139137 with open (json_path , "r" ) as f :
140138 bench (VideoDecoder , file_like = True , source = video_path , stream_index = stream_index , custom_frame_mappings = f )
141139
142- # Compare against seek_modes
140+ # Compare against exact seek_mode
143141 print ("Creating a VideoDecoder object with seek_mode='exact':" )
144142 bench (VideoDecoder , source = video_path , stream_index = stream_index , seek_mode = "exact" )
145143
146144# %%
147145# Performance: Frame decoding with custom frame mappings
148146# ------------------------------------------------------
149147#
150- # Although the custom_frame_mappings parameter only affects the performance of
151- # the :class:`~torchcodec.decoders.VideoDecoder` creation , decoding workflows
152- # typically involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance.
153- # As a result, the performance benefits of custom_frame_mappings can be seen .
148+ # Although using custom_frame_mappings only impacts the initialization speed of
149+ # :class:`~torchcodec.decoders.VideoDecoder`, decoding workflows
150+ # usually involve creating a :class:`~torchcodec.decoders.VideoDecoder` instance,
151+ # so the performance benefits are realized .
154152
155153
156154def decode_frames (video_path , seek_mode = "exact" , custom_frame_mappings = None ):
@@ -163,8 +161,8 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
163161
164162
165163for video_path , json_path in ((short_video_path , short_json_path ), (long_video_path , long_json_path )):
166- print (f"Running benchmarks on { Path (video_path ).name } " )
167- print ("Decoding frames with custom_frame_mappings JSON str from file :" )
164+ print (f"\n Running benchmarks on { Path (video_path ).name } " )
165+ print ("Decoding frames with custom_frame_mappings:" )
168166 with open (json_path , "r" ) as f :
169167 bench (decode_frames , file_like = True , video_path = video_path , custom_frame_mappings = f )
170168
@@ -175,9 +173,9 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
175173# Accuracy: Metadata and frame retrieval
176174# --------------------------------------
177175#
178- # We've seen that using custom frame mappings can significantly speed up
179- # the :class:`~torchcodec.decoders.VideoDecoder` creation. The advantage is that
180- # seeking is still as accurate as with ``seek_mode="exact"``.
176+ # In addition to the instantiation speed up compared to ``seek_mode="exact"``, using custom frame mappings
177+ # also retains the benefit of exact metadata and frame seeking.
178+ #
181179
182180print ("Metadata of short video with custom_frame_mappings:" )
183181with open (short_json_path , "r" ) as f :
@@ -202,9 +200,8 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
202200#
203201# Custom frame mappings contain the same frame index information
204202# that would normally be computed during the :term:`scan` operation in exact mode.
205- # (frame presentation timestamps (PTS), durations, and keyframe indicators)
206203# By providing this information to the :class:`~torchcodec.decoders.VideoDecoder`
207- # as a JSON, it eliminates the need for the expensive scan while preserving all the
204+ # as a JSON, it eliminates the need for the expensive scan while preserving the
208205# accuracy benefits.
209206#
210207# Which mode should I use?
0 commit comments