Skip to content

Commit d5be152

Browse files
committed
add suggestions, link in docstrings
1 parent ba3cbbf commit d5be152

File tree

4 files changed

+65
-16
lines changed

4 files changed

+65
-16
lines changed

docs/source/api_ref_encoders.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_encoding_a
1616
:template: class.rst
1717

1818
AudioEncoder
19+
VideoEncoder

docs/source/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,14 @@ Encoding
9898

9999
How encode audio samples
100100

101+
.. grid-item-card:: :octicon:`file-code;1em`
102+
Video Encoding
103+
:img-top: _static/img/card-background.svg
104+
:link: generated_examples/encoding/video_encoding.html
105+
:link-type: url
106+
107+
How to encode video frames
108+
101109
.. toctree::
102110
:maxdepth: 1
103111
:caption: TorchCodec documentation

examples/encoding/video_encoding.py

Lines changed: 41 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
# %%
1717
# First, we'll download a video and decode some frames to tensors.
18-
# These will be the input to the VideoEncoder. For more details on decoding,
18+
# These will be the input to the :class:`~torchcodec.encoders.VideoEncoder`. For more details on decoding,
1919
# see :ref:`sphx_glr_generated_examples_decoding_basic_example.py`.
2020
# Otherwise, skip ahead to :ref:`creating_encoder`.
2121

@@ -35,7 +35,7 @@ def play_video(encoded_bytes):
3535

3636

3737
# Video source: https://www.pexels.com/video/adorable-cats-on-the-lawn-4977395/
38-
# License: CC0. Author: Altaf Shah.
38+
# Author: Altaf Shah.
3939
url = "https://videos.pexels.com/video-files/4977395/4977395-hd_1920_1080_24fps.mp4"
4040

4141
response = requests.get(url, headers={"User-Agent": ""})
@@ -45,7 +45,7 @@ def play_video(encoded_bytes):
4545
raw_video_bytes = response.content
4646

4747
decoder = VideoDecoder(raw_video_bytes)
48-
frames = decoder[:60] # Get first 60 frames
48+
frames = decoder.get_frames_in_range(0, 60).data # Get first 60 frames
4949
# TODO: use float once other PR lands
5050
frame_rate = int(decoder.metadata.average_fps)
5151

@@ -78,7 +78,7 @@ def play_video(encoded_bytes):
7878
#
7979
# :class:`~torchcodec.encoders.VideoEncoder` supports encoding frames into a
8080
# file via the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method, to
81-
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_filelike`
81+
# file-like objects via the :meth:`~torchcodec.encoders.VideoEncoder.to_file_like`
8282
# method, or to raw bytes via :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`.
8383
# For now we will use :meth:`~torchcodec.encoders.VideoEncoder.to_tensor`, so we
8484
# can easily inspect and display the encoded video.
@@ -92,15 +92,25 @@ def play_video(encoded_bytes):
9292
# round-trip encode/decode process works as expected:
9393

9494
decoder_verify = VideoDecoder(encoded_frames)
95-
decoded_frames = decoder_verify[:]
95+
decoded_frames = decoder_verify.get_frames_in_range(0, 60).data
9696

9797
print(f"Re-decoded video: {decoded_frames.shape = }")
9898
print(f"Original frames: {frames.shape = }")
9999

100100
# %%
101+
# .. _codec_selection:
102+
#
101103
# Codec Selection
102104
# ---------------
103105
#
106+
# By default, the codec used is selected automatically using the file extension provided
107+
# in the ``dest`` parameter for the :meth:`~torchcodec.encoders.VideoEncoder.to_file` method,
108+
# or using the ``format`` parameter for the
109+
# :meth:`~torchcodec.encoders.VideoEncoder.to_file_like` and
110+
# :meth:`~torchcodec.encoders.VideoEncoder.to_tensor` methods.
111+
#
112+
# - For example, when encoding to MP4 format, the default codec used is ``H.264``.
113+
#
104114
# The ``codec`` parameter specifies which video codec to use for encoding.
105115
# You can specify either a specific codec implementation (e.g., ``"libx264"``)
106116
# or a codec specification (e.g., ``"h264"``). Different codecs offer
@@ -112,18 +122,21 @@ def play_video(encoded_bytes):
112122
#
113123
# Let's encode the same frames using different codecs:
114124

125+
import tempfile
126+
from pathlib import Path
127+
115128
# H.264 encoding
116-
h264_output = "libx264_encoded.mp4"
129+
h264_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
117130
encoder.to_file(h264_output, codec="libx264")
118131

119132
# H.265 encoding
120-
hevc_output = "hevc_encoded.mp4"
133+
hevc_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
121134
encoder.to_file(hevc_output, codec="hevc")
122135

123136
# Now let's use ffprobe to verify the codec used in the output files
124137
import subprocess
125138

126-
for output in [h264_output, hevc_output]:
139+
for output, name in [(h264_output, "h264_output"), (hevc_output, "hevc_output")]:
127140
result = subprocess.run(
128141
[
129142
"ffprobe",
@@ -140,9 +153,16 @@ def play_video(encoded_bytes):
140153
capture_output=True,
141154
text=True,
142155
)
143-
print(f"Codec used in {output}: {result.stdout.strip()}")
156+
print(f"Codec used in {name}: {result.stdout.strip()}")
157+
158+
# %%
159+
# For most cases, you can simply specify the format parameter and let the FFmpeg select the default codec.
160+
# However, specifying the codec parameter is useful to select a particular codec implementation
161+
# (``libx264`` vs ``libx265``) or to have more control over the encoding behavior.
144162

145163
# %%
164+
# .. _pixel_format:
165+
#
146166
# Pixel Format
147167
# ------------
148168
#
@@ -169,6 +189,8 @@ def play_video(encoded_bytes):
169189
play_video(yuv420_encoded_frames)
170190

171191
# %%
192+
# .. _crf:
193+
#
172194
# CRF (Constant Rate Factor)
173195
# --------------------------
174196
#
@@ -197,6 +219,8 @@ def play_video(encoded_bytes):
197219

198220

199221
# %%
222+
# .. _preset:
223+
#
200224
# Preset
201225
# ------
202226
#
@@ -207,25 +231,27 @@ def play_video(encoded_bytes):
207231
# For example, with the commonly used H.264 codec, ``libx264`` presets include:
208232
#
209233
# - ``"ultrafast"`` (fastest), ``"fast"``, ``"medium"`` (default), ``"slow"``, ``"veryslow"`` (slowest, best compression).
234+
# - See additional details in the `H.264 Video Encoding Guide <https://trac.ffmpeg.org/wiki/Encode/H.264#a2.Chooseapresetandtune>`_.
210235
#
211236
# .. note::
212237
#
213238
# Not all codecs support the ``presets`` option. Use ``ffmpeg -h encoder=<codec_name>``
214239
# to check available options for your selected codec.
215240
#
216241

217-
import os
218242
# Fast encoding with a larger file size
219-
fast_output = "fast_encoded.mp4"
243+
fast_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
220244
encoder.to_file(fast_output, codec="libx264", preset="ultrafast")
221-
print(f"Size of fast encoded file: {os.path.getsize(fast_output)} bytes")
245+
print(f"Size of fast encoded file: {Path(fast_output).stat().st_size} bytes")
222246

223247
# Slow encoding for a smaller file size
224-
slow_output = "slow_encoded.mp4"
248+
slow_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
225249
encoder.to_file(slow_output, codec="libx264", preset="veryslow")
226-
print(f"Size of slow encoded file: {os.path.getsize(slow_output)} bytes")
250+
print(f"Size of slow encoded file: {Path(slow_output).stat().st_size} bytes")
227251

228252
# %%
253+
# .. _extra_options:
254+
#
229255
# Extra Options
230256
# -------------
231257
#
@@ -234,7 +260,6 @@ def play_video(encoded_bytes):
234260
# control of encoding settings beyond the common parameters.
235261
#
236262
# For example, some potential extra options for the commonly used H.264 codec, ``libx264`` include:
237-
# For example, with , ``libx264``:
238263
#
239264
# - ``"g"`` - GOP (Group of Pictures) size / keyframe interval
240265
# - ``"max_b_frames"`` - Maximum number of B-frames between I and P frames
@@ -248,7 +273,7 @@ def play_video(encoded_bytes):
248273

249274

250275
# Custom GOP size and tuning
251-
custom_output = "custom_encoded.mp4"
276+
custom_output = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
252277
encoder.to_file(
253278
custom_output,
254279
codec="libx264",

src/torchcodec/encoders/_video_encoder.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,23 @@ def to_file(
5151
codec (str, optional): The codec to use for encoding (e.g., "libx264",
5252
"h264"). If not specified, the default codec
5353
for the container format will be used.
54+
See :ref:`codec_selection` for details.
5455
pixel_format (str, optional): The pixel format for encoding (e.g.,
5556
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
57+
See :ref:`pixel_format` for details.
5658
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
5759
mean better quality. Valid range depends on the encoder (commonly 0-51).
5860
Defaults to None (which will use encoder's default).
61+
See :ref:`crf` for details.
5962
preset (str or int, optional): Encoder option that controls the tradeoff between
6063
encoding speed and compression. Valid values depend on the encoder (commonly
6164
a string: "fast", "medium", "slow"). Defaults to None
6265
(which will use encoder's default).
66+
See :ref:`preset` for details.
6367
extra_options (dict[str, Any], optional): A dictionary of additional
6468
encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
6569
Values will be converted to strings before passing to the encoder.
70+
See :ref:`extra_options` for details.
6671
"""
6772
preset = str(preset) if isinstance(preset, int) else preset
6873
_core.encode_video_to_file(
@@ -96,18 +101,23 @@ def to_tensor(
96101
codec (str, optional): The codec to use for encoding (e.g., "libx264",
97102
"h264"). If not specified, the default codec
98103
for the container format will be used.
104+
See :ref:`codec_selection` for details.
99105
pixel_format (str, optional): The pixel format to encode frames into (e.g.,
100106
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
107+
See :ref:`pixel_format` for details.
101108
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
102109
mean better quality. Valid range depends on the encoder (commonly 0-51).
103110
Defaults to None (which will use encoder's default).
111+
See :ref:`crf` for details.
104112
preset (str or int, optional): Encoder option that controls the tradeoff between
105113
encoding speed and compression. Valid values depend on the encoder (commonly
106114
a string: "fast", "medium", "slow"). Defaults to None
107115
(which will use encoder's default).
116+
See :ref:`preset` for details.
108117
extra_options (dict[str, Any], optional): A dictionary of additional
109118
encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
110119
Values will be converted to strings before passing to the encoder.
120+
See :ref:`extra_options` for details.
111121
112122
Returns:
113123
Tensor: The raw encoded bytes as 1D uint8 Tensor.
@@ -150,18 +160,23 @@ def to_file_like(
150160
codec (str, optional): The codec to use for encoding (e.g., "libx264",
151161
"h264"). If not specified, the default codec
152162
for the container format will be used.
163+
See :ref:`codec_selection` for details.
153164
pixel_format (str, optional): The pixel format for encoding (e.g.,
154165
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
166+
See :ref:`pixel_format` for details.
155167
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
156168
mean better quality. Valid range depends on the encoder (commonly 0-51).
157169
Defaults to None (which will use encoder's default).
170+
See :ref:`crf` for details.
158171
preset (str or int, optional): Encoder option that controls the tradeoff between
159172
encoding speed and compression. Valid values depend on the encoder (commonly
160173
a string: "fast", "medium", "slow"). Defaults to None
161174
(which will use encoder's default).
175+
See :ref:`preset` for details.
162176
extra_options (dict[str, Any], optional): A dictionary of additional
163177
encoder options to pass, e.g. ``{"qp": 5, "tune": "film"}``.
164178
Values will be converted to strings before passing to the encoder.
179+
See :ref:`extra_options` for details.
165180
"""
166181
preset = str(preset) if isinstance(preset, int) else preset
167182
_core.encode_video_to_file_like(

0 commit comments

Comments
 (0)