Skip to content

Commit ba3cbbf

Browse files
committed
add tutorial w videos
1 parent 17164fd commit ba3cbbf

File tree

2 files changed

+263
-0
lines changed

2 files changed

+263
-0
lines changed

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def __call__(self, filename):
8787
assert "examples/encoding" in self.src_dir
8888
order = [
8989
"audio_encoding.py",
90+
"video_encoding.py",
9091
]
9192

9293
try:
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
"""
8+
=======================================
9+
Encoding video frames with VideoEncoder
10+
=======================================
11+
12+
In this example, we'll learn how to encode video frames to a file or to raw
13+
bytes using the :class:`~torchcodec.encoders.VideoEncoder` class.
14+
"""
15+
16+
# %%
17+
# First, we'll download a video and decode some frames to tensors.
18+
# These will be the input to the VideoEncoder. For more details on decoding,
19+
# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
20+
# Otherwise, skip ahead to :ref:`creating_encoder`.
21+
22+
import requests
23+
from torchcodec.decoders import VideoDecoder
24+
from IPython.display import Video
25+
26+
27+
def play_video(encoded_bytes):
28+
return Video(
29+
data=encoded_bytes.numpy().tobytes(),
30+
embed=True,
31+
width=640,
32+
height=360,
33+
mimetype="video/mp4",
34+
)
35+
36+
37+
# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/
38+
# License: CC0. Author: Altaf Shah.
39+
url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4"
40+
41+
response = requests.get(url, headers={"User-Agent": ""})
42+
if response.status_code != 200:
43+
raise RuntimeError(f"Failed to download video. {response.status_code = }.")
44+
45+
raw_video_bytes = response.content
46+
47+
decoder = VideoDecoder(raw_video_bytes)
48+
frames = decoder[:60] # Get first 60 frames
49+
# TODO: use float once other PR lands
50+
frame_rate = int(decoder.metadata.average_fps)
51+
52+
# %%
53+
# .. _creating_encoder:
54+
#
55+
# Creating an encoder
56+
# -------------------
57+
#
58+
# Let's instantiate a :class:`~torchcodec.encoders.VideoEncoder`. We will need to provide
59+
# the frames to be encoded as a 4D tensor of shape
60+
# ``(num_frames, num_channels, height, width)`` with values in the ``[0, 255]``
61+
# range and ``torch.uint8`` dtype. We will also need to provide the frame rate of the input
62+
# video.
63+
#
64+
# .. note::
65+
#
66+
# The ``frame_rate`` parameter corresponds to the frame rate of the
67+
# *input* video. It will also be used for the frame rate of the *output* encoded video.
68+
from torchcodec.encoders import VideoEncoder
69+
70+
print(f"{frames.shape = }, {frames.dtype = }")
71+
print(f"{frame_rate = } fps")
72+
73+
encoder = VideoEncoder(frames=frames, frame_rate=frame_rate)
74+
75+
# %%
76+
# Encoding to file, bytes, or file-like
77+
# -------------------------------------
78+
#
79+
# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a
80+
# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to
81+
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_filelike`
82+
# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`.
83+
# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we
84+
# can easily inspect and display the encoded video.
85+
86+
encoded_frames = encoder.to_tensor(format="mp4")
87+
play_video(encoded_frames)
88+
89+
# %%
90+
#
91+
# Now that we have encoded data, we can decode it back to verify the
92+
# round-trip encode/decode process works as expected:
93+
94+
decoder_verify = VideoDecoder(encoded_frames)
95+
decoded_frames = decoder_verify[:]
96+
97+
print(f"Re-decoded video: {decoded_frames.shape = }")
98+
print(f"Original frames: {frames.shape = }")
99+
100+
# %%
101+
# Codec Selection
102+
# ---------------
103+
#
104+
# The ``codec`` parameter specifies which video codec to use for encoding.
105+
# You can specify either a specific codec implementation (e.g., ``"libx264"``)
106+
# or a codec specification (e.g., ``"h264"``). Different codecs offer
107+
# different tradeoffs between quality, file size, and encoding speed.
108+
#
109+
# .. note::
110+
#
111+
# To see available encoders on your system, run ``ffmpeg -encoders``.
112+
#
113+
# Let's encode the same frames using different codecs:
114+
115+
# H.264 encoding
116+
h264_output = "libx264_encoded.mp4"
117+
encoder.to_file(h264_output, codec="libx264")
118+
119+
# H.265 encoding
120+
hevc_output = "hevc_encoded.mp4"
121+
encoder.to_file(hevc_output, codec="hevc")
122+
123+
# Now let's use ffprobe to verify the codec used in the output files
124+
import subprocess
125+
126+
for output in [h264_output, hevc_output]:
127+
result = subprocess.run(
128+
[
129+
"ffprobe",
130+
"-v",
131+
"error",
132+
"-select_streams",
133+
"v:0",
134+
"-show_entries",
135+
"stream=codec_name",
136+
"-of",
137+
"default=noprint_wrappers=1:nokey=1",
138+
output,
139+
],
140+
capture_output=True,
141+
text=True,
142+
)
143+
print(f"Codec used in {output}: {result.stdout.strip()}")
144+
145+
# %%
146+
# Pixel Format
147+
# ------------
148+
#
149+
# The ``pixel_format`` parameter controls the color sampling (chroma subsampling)
150+
# of the output video. This affects both quality and file size.
151+
#
152+
# Common pixel formats:
153+
#
154+
# - ``"yuv420p"`` - 4:2:0 chroma subsampling (standard quality, smaller file size, widely compatible)
155+
# - ``"yuv444p"`` - 4:4:4 chroma subsampling (full chroma resolution, higher quality, larger file size)
156+
#
157+
# Most playback devices and platforms support ``yuv420p``, making it the most
158+
# common choice for video encoding.
159+
#
160+
# .. note::
161+
#
162+
# Pixel format support depends on the codec used. Use ``ffmpeg -h encoder=<codec_name>``
163+
# to check available options for your selected codec.
164+
165+
# Standard pixel format
166+
yuv420_encoded_frames = encoder.to_tensor(
167+
format="mp4", codec="libx264", pixel_format="yuv420p"
168+
)
169+
play_video(yuv420_encoded_frames)
170+
171+
# %%
172+
# CRF (Constant Rate Factor)
173+
# --------------------------
174+
#
175+
# The ``crf`` parameter controls video quality, where lower values produce higher quality output.
176+
#
177+
# For example, with the commonly used H.264 codec, ``libx264``:
178+
#
179+
# - Values range from 0 (lossless) to 51 (worst quality)
180+
# - Values 17 or 18 are conisdered visually lossless, and the default is 23.
181+
#
182+
# .. note::
183+
#
184+
# The range and interpretation of CRF values depend on the codec used, and
185+
# not all codecs support CRF. Use ``ffmpeg -h encoder=<codec_name>`` to
186+
# check available options for your selected codec.
187+
#
188+
189+
# High quality (low CRF)
190+
high_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=0)
191+
play_video(high_quality_output)
192+
193+
# %%
194+
# Low quality (high CRF)
195+
low_quality_output = encoder.to_tensor(format="mp4", codec="libx264", crf=50)
196+
play_video(low_quality_output)
197+
198+
199+
# %%
200+
# Preset
201+
# ------
202+
#
203+
# The ``preset`` parameter controls the tradeoff between encoding speed and file compression.
204+
# Faster presets encode faster but produce larger files, while slower
205+
# presets take more time to encode but result in better compression.
206+
#
207+
# For example, with the commonly used H.264 codec, ``libx264`` presets include:
208+
#
209+
# - ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, ``"veryslow"`` (slowest, best compression).
210+
#
211+
# .. note::
212+
#
213+
# Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=<codec_name>``
214+
# to check available options for your selected codec.
215+
#
216+
217+
import os
218+
# Fast encoding with a larger file size
219+
fast_output = "fast_encoded.mp4"
220+
encoder.to_file(fast_output, codec="libx264", preset="ultrafast")
221+
print(f"Size of fast encoded file: {os.path.getsize(fast_output)} bytes")
222+
223+
# Slow encoding for a smaller file size
224+
slow_output = "slow_encoded.mp4"
225+
encoder.to_file(slow_output, codec="libx264", preset="veryslow")
226+
print(f"Size of slow encoded file: {os.path.getsize(slow_output)} bytes")
227+
228+
# %%
229+
# Extra Options
230+
# -------------
231+
#
232+
# The ``extra_options`` parameter accepts a dictionary of codec-specific options
233+
# that would normally be set via FFmpeg command-line arguments. This enables
234+
# control of encoding settings beyond the common parameters.
235+
#
236+
# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include:
237+
# For example, with , ``libx264``:
238+
#
239+
# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval
240+
# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames
241+
# - ``"tune"`` - Tuning preset (e.g., ``"film"``, ``"animation"``, ``"grain"``)
242+
#
243+
# .. note::
244+
#
245+
# Use ``ffmpeg -h encoder=<codec_name>`` to see all available options for
246+
# a specific codec.
247+
#
248+
249+
250+
# Custom GOP size and tuning
251+
custom_output = "custom_encoded.mp4"
252+
encoder.to_file(
253+
custom_output,
254+
codec="libx264",
255+
extra_options={
256+
"g": 50, # Keyframe every 50 frames
257+
"max_b_frames": 0, # Disable B-frames for faster decoding
258+
"tune": "fastdecode", # Optimize for fast decoding
259+
}
260+
)
261+
262+
# %%

0 commit comments

Comments
 (0)