Skip to content

Commit dbd6610

Browse files
author
Daniel Flores
committed
add descriptive comments
1 parent 738501a commit dbd6610

File tree

1 file changed

+86
-46
lines changed

1 file changed

+86
-46
lines changed

examples/decoding/custom_frame_mappings.py

Lines changed: 86 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -11,49 +11,59 @@
1111
1212
In this example, we will describe the ``custom_frame_mappings`` parameter of the
1313
:class:`~torchcodec.decoders.VideoDecoder` class.
14+
15+
This parameter allows you to provide pre-computed frame mapping information to
16+
speed up :class:`~torchcodec.decoders.VideoDecoder` instantiation, while
17+
maintaining the frame seeking accuracy of ``seek_mode="exact"``.
18+
19+
This makes it ideal for workflows where:
20+
1. accuracy is critical, so ``seek_mode="approximate"`` cannot be used
21+
2. the videos can be preprocessed once and then decoded many times.
1422
"""
1523

1624
# %%
17-
# Create an HD video using ffmpeg and use the ffmpeg CLI to repeat it 10 times
18-
# to get two videos: a short video of approximately 30 seconds and a long one of about 10 mins.
25+
# First, let's set up our test videos: we'll download a short video and
26+
# use ffmpeg to create a longer version by repeating it multiple times.
1927

2028
import tempfile
2129
from pathlib import Path
2230
import subprocess
23-
from torchcodec.decoders import VideoDecoder
31+
import requests
32+
33+
url = "https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"
34+
response = requests.get(url, headers={"User-Agent": ""})
35+
if response.status_code != 200:
36+
raise RuntimeError(f"Failed to download video. {response.status_code = }.")
2437

2538
temp_dir = tempfile.mkdtemp()
2639
short_video_path = Path(temp_dir) / "short_video.mp4"
27-
28-
ffmpeg_generate_video_command = [
29-
"ffmpeg",
30-
"-y",
31-
"-f", "lavfi",
32-
"-i", "mandelbrot=s=1920x1080",
33-
"-t", "30",
34-
"-c:v", "h264",
35-
"-r", "60",
36-
"-g", "600",
37-
"-pix_fmt", "yuv420p",
38-
f"{short_video_path}"
39-
]
40-
subprocess.run(ffmpeg_generate_video_command)
40+
with open(short_video_path, 'wb') as f:
41+
for chunk in response.iter_content():
42+
f.write(chunk)
4143

4244
long_video_path = Path(temp_dir) / "long_video.mp4"
4345
ffmpeg_command = [
4446
"ffmpeg",
45-
"-stream_loop", "20", # repeat video 20 times to get a 10 min video
47+
"-stream_loop", "3", # repeat video 3 times to get a ~13 min video
4648
"-i", f"{short_video_path}",
4749
"-c", "copy",
4850
f"{long_video_path}"
4951
]
50-
subprocess.run(ffmpeg_command)
52+
subprocess.run(ffmpeg_command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
5153

54+
from torchcodec.decoders import VideoDecoder
5255
print(f"Short video duration: {VideoDecoder(short_video_path).metadata.duration_seconds} seconds")
5356
print(f"Long video duration: {VideoDecoder(long_video_path).metadata.duration_seconds / 60} minutes")
5457

5558
# %%
56-
# Preprocessing step to create frame mappings for the videos using ffprobe.
59+
# .. _frame_mappings_creation:
60+
#
61+
# Creating custom frame mappings with ffprobe
62+
# --------------------------------------------
63+
#
64+
# The key to using custom frame mappings is preprocessing your videos to extract
65+
# frame timing information, and whether or not a frame is a keyframe information.
66+
# We use ffprobe to generate JSON files containing this metadata.
5767

5868
from pathlib import Path
5969
import subprocess
@@ -62,7 +72,6 @@
6272

6373
stream_index = 0
6474

65-
temp_dir = tempfile.mkdtemp()
6675
long_json_path = Path(temp_dir) / "long_custom_frame_mappings.json"
6776
short_json_path = Path(temp_dir) / "short_custom_frame_mappings.json"
6877

@@ -79,8 +88,14 @@
7988
print(f"Wrote {len(ffprobe_result.stdout)} characters to {short_json_path}")
8089

8190
# %%
82-
# Define benchmarking function. When a file_like object is passed in, its necessary to seek
83-
# to the beginning of the file before reading it in the next iteration.
91+
# .. _perf_creation:
92+
#
93+
# Performance: ``VideoDecoder`` creation with custom frame mappings
94+
# ------------------------------------------------------------------
95+
#
96+
# Let's define a benchmarking function to measure performance. Note that when using
97+
# file-like objects for custom_frame_mappings, we need to seek back to the beginning
98+
# between iterations since the JSON data is consumed during VideoDecoder creation.
8499

85100
import torch
86101

@@ -106,16 +121,15 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
106121
print(f"{med = :.2f}ms +- {std:.2f}")
107122

108123
# %%
109-
# Compare performance of initializing VideoDecoder with custom_frame_mappings vs exact seek_mode
124+
# Now let's compare the performance of creating VideoDecoder objects with custom
125+
# frame mappings versus the exact seek mode. You'll see that custom
126+
# frame mappings provide significant speedups, especially for longer videos.
110127

111128

112129
for video_path, json_path in ((short_video_path, short_json_path), (long_video_path, long_json_path)):
113130
print(f"Running benchmarks on {Path(video_path).name}")
114-
print("Creating a VideoDecoder object with custom_frame_mappings JSON str from file:")
115-
with open(json_path, "r") as f:
116-
bench(VideoDecoder, source=video_path, stream_index=stream_index, custom_frame_mappings=(f.read()))
117131

118-
print("Creating a VideoDecoder object with custom_frame_mappings from filelike:")
132+
print("Creating a VideoDecoder object with custom_frame_mappings:")
119133
with open(json_path, "r") as f:
120134
bench(VideoDecoder, file_like=True, source=video_path, stream_index=stream_index, custom_frame_mappings=f)
121135

@@ -124,7 +138,12 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
124138
bench(VideoDecoder, source=video_path, stream_index=stream_index, seek_mode="exact")
125139

126140
# %%
127-
# Decode frames with custom_frame_mappings vs exact seek_mode
141+
# Performance: Frame decoding with custom frame mappings
142+
# --------------------------------------------------------
143+
#
144+
# The performance benefits extend to frame decoding operations as well, since
145+
# each decoding workflow typically involves creating a VideoDecoder instance.
146+
# Let's compare frame decoding performance between the two approaches.
128147

129148

130149
def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None):
@@ -140,38 +159,59 @@ def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None)
140159
print(f"Running benchmarks on {Path(video_path).name}")
141160
print("Decoding frames with custom_frame_mappings JSON str from file:")
142161
with open(json_path, "r") as f:
143-
bench(decode_frames, video_path=video_path, custom_frame_mappings=(f.read()))
162+
bench(decode_frames, file_like=True, video_path=video_path, custom_frame_mappings=f)
144163

145164
print("Decoding frames with seek_mode='exact':")
146165
bench(decode_frames, video_path=video_path, seek_mode="exact")
147166

148167
# %%
149-
# Compare frame accuracy with custom_frame_mappings vs exact seek_mode
150-
video_path = short_video_path
151-
json_path = short_json_path
168+
# Accuracy: High accuracy frame seeking with custom frame mappings
169+
# -----------------------------------------------------------
170+
#
171+
# The main advantage of using custom frame mappings over approximate mode is that
172+
# frame seeking accuracy is as high as exact mode.
173+
174+
video_path = long_video_path
175+
json_path = long_json_path
152176
with open(json_path, "r") as f:
153-
custom_frame_mappings = f.read()
154177
custom_frame_mappings_decoder = VideoDecoder(
155178
source=video_path,
156-
custom_frame_mappings=custom_frame_mappings
179+
custom_frame_mappings=f,
180+
stream_index=0
157181
)
158182

159-
exact_decoder = VideoDecoder(short_video_path, seek_mode="exact")
160-
approx_decoder = VideoDecoder(short_video_path, seek_mode="approximate")
183+
exact_decoder = VideoDecoder(video_path, seek_mode="exact", stream_index=0)
184+
approx_decoder = VideoDecoder(video_path, seek_mode="approximate", stream_index=0)
161185

162-
print("Metadata of short video with custom_frame_mappings:")
163-
print(custom_frame_mappings_decoder.metadata)
164-
print("Metadata of short video with seek_mode='exact':")
165-
print(exact_decoder.metadata)
166-
print("Metadata of short video with seek_mode='approximate':")
167-
print(approx_decoder.metadata)
168-
169-
for i in range(len(approx_decoder)):
186+
print("Comparing frames between exact seek mode decoder and custom_frame_mappings decoder:")
187+
for i in range(len(exact_decoder)):
170188
torch.testing.assert_close(
171-
approx_decoder.get_frame_at(i).data,
189+
exact_decoder.get_frame_at(i).data,
172190
custom_frame_mappings_decoder.get_frame_at(i).data,
173191
atol=0, rtol=0,
174192
)
175193
print("Frame seeking is the same for this video!")
176194

177195
# %%
196+
# How do custom_frame_mappings help?
197+
# ----------------------------------
198+
#
199+
# Custom frame mappings contain the same frame index information
200+
# that would normally be computed during the :term:`scan` operation in exact mode.
201+
# (frame presentation timestamps (PTS), durations, and keyframe indicators)
202+
# By providing this information to the :class:`~torchcodec.decoders.VideoDecoder`
203+
# as a JSON, it eliminates the need for the expensive scan while preserving all the
204+
# accuracy benefits.
205+
#
206+
# Which approach should I use?
207+
# -----------------------------
208+
#
209+
# - For fastest decoding, "approximate" mode is strongly recommended.
210+
#
211+
# - For exact frame seeking, custom frame mappings will benefit workflows where the
212+
# same videos are decoded repeatedly, and some preprocessing work can be done.
213+
#
214+
# - For exact frame seeking without preprocessing, use "exact" mode.
215+
#
216+
217+
# %%

0 commit comments

Comments
 (0)