Skip to content

Commit 039ad71

Browse files
author
pytorchbot
committed
2025-11-14 nightly release (0535b00)
1 parent 9d519da commit 039ad71

File tree

17 files changed

+732
-378
lines changed

17 files changed

+732
-378
lines changed

.github/workflows/lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
run: python -m pip install --upgrade pip
6363
- name: Install dependencies and FFmpeg
6464
run: |
65-
python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
65+
python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
6666
conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
6767
ffmpeg -version
6868
- name: Build and install torchcodec

docs/source/api_ref_transforms.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
.. _transforms:
2+
3+
=====================
4+
torchcodec.transforms
5+
=====================
6+
7+
.. currentmodule:: torchcodec.transforms
8+
9+
For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
10+
11+
.. autosummary::
12+
:toctree: generated/
13+
:nosignatures:
14+
:template: dataclass.rst
15+
16+
DecoderTransform
17+
Resize

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def __call__(self, filename):
209209
intersphinx_mapping = {
210210
"python": ("https://docs.python.org/3/", None),
211211
"torch": ("https://pytorch.org/docs/stable/", None),
212+
"torchvision": ("https://docs.pytorch.org/vision/stable/", None),
212213
"numpy": ("https://numpy.org/doc/stable/", None),
213214
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
214215
"matplotlib": ("https://matplotlib.org/stable/", None),

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,4 @@ Encoding
125125
api_ref_decoders
126126
api_ref_encoders
127127
api_ref_samplers
128+
api_ref_transforms

mypy.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ files = src/torchcodec
44
show_error_codes = True
55
pretty = True
66
allow_redefinition = True
7+
follow_untyped_imports = True

src/torchcodec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
1010
# but that results in circular import.
1111
from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa
12-
from . import decoders, encoders, samplers # noqa
12+
from . import decoders, encoders, samplers, transforms # noqa
1313

1414
try:
1515
# Note that version.py is generated during install.

src/torchcodec/_core/Encoder.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "torch/types.h"
66

77
extern "C" {
8+
#include <libavutil/opt.h>
89
#include <libavutil/pixdesc.h>
910
}
1011

@@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat(
568569
}
569570
TORCH_CHECK(false, errorMsg.str());
570571
}
572+
573+
void validateDoubleOption(
574+
const AVCodec& avCodec,
575+
const char* optionName,
576+
double value) {
577+
if (!avCodec.priv_class) {
578+
return;
579+
}
580+
const AVOption* option = av_opt_find2(
581+
// Convert obj arg from const AVClass* const* to non-const void*
582+
// First cast to remove const, then cast to void*
583+
const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
584+
optionName,
585+
nullptr,
586+
0,
587+
AV_OPT_SEARCH_FAKE_OBJ,
588+
nullptr);
589+
// If the option was not found, let FFmpeg handle it later
590+
if (!option) {
591+
return;
592+
}
593+
if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
594+
option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
595+
TORCH_CHECK(
596+
value >= option->min && value <= option->max,
597+
optionName,
598+
"=",
599+
value,
600+
" is out of valid range [",
601+
option->min,
602+
", ",
603+
option->max,
604+
"] for this codec. For more details, run 'ffmpeg -h encoder=",
605+
avCodec.name,
606+
"'");
607+
}
608+
}
571609
} // namespace
572610

573611
VideoEncoder::~VideoEncoder() {
@@ -700,12 +738,17 @@ void VideoEncoder::initializeEncoder(
700738
// Apply videoStreamOptions
701739
AVDictionary* options = nullptr;
702740
if (videoStreamOptions.crf.has_value()) {
741+
validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
703742
av_dict_set(
704743
&options,
705744
"crf",
706745
std::to_string(videoStreamOptions.crf.value()).c_str(),
707746
0);
708747
}
748+
if (videoStreamOptions.preset.has_value()) {
749+
av_dict_set(
750+
&options, "preset", videoStreamOptions.preset.value().c_str(), 0);
751+
}
709752
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
710753
av_dict_free(&options);
711754

src/torchcodec/_core/StreamOptions.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,11 @@ struct VideoStreamOptions {
4545
std::string_view deviceVariant = "ffmpeg";
4646

4747
// Encoding options
48-
// TODO-VideoEncoder: Consider adding other optional fields here
49-
// (bit rate, gop size, max b frames, preset)
50-
std::optional<int> crf;
51-
5248
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5349
// If not specified, uses codec's default format.
5450
std::optional<std::string> pixelFormat;
51+
std::optional<double> crf;
52+
std::optional<std::string> preset;
5553
};
5654

5755
struct AudioStreamOptions {

src/torchcodec/_core/custom_ops.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -603,11 +603,13 @@ void encode_video_to_file(
603603
const at::Tensor& frames,
604604
int64_t frame_rate,
605605
std::string_view file_name,
606-
std::optional<std::string> pixel_format = std::nullopt,
607-
std::optional<int64_t> crf = std::nullopt) {
606+
std::optional<std::string_view> pixel_format = std::nullopt,
607+
std::optional<double> crf = std::nullopt,
608+
std::optional<std::string_view> preset = std::nullopt) {
608609
VideoStreamOptions videoStreamOptions;
609610
videoStreamOptions.pixelFormat = pixel_format;
610611
videoStreamOptions.crf = crf;
612+
videoStreamOptions.preset = preset;
611613
VideoEncoder(
612614
frames,
613615
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -620,12 +622,14 @@ at::Tensor encode_video_to_tensor(
620622
const at::Tensor& frames,
621623
int64_t frame_rate,
622624
std::string_view format,
623-
std::optional<std::string> pixel_format = std::nullopt,
624-
std::optional<int64_t> crf = std::nullopt) {
625+
std::optional<std::string_view> pixel_format = std::nullopt,
626+
std::optional<double> crf = std::nullopt,
627+
std::optional<std::string_view> preset = std::nullopt) {
625628
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
626629
VideoStreamOptions videoStreamOptions;
627630
videoStreamOptions.pixelFormat = pixel_format;
628631
videoStreamOptions.crf = crf;
632+
videoStreamOptions.preset = preset;
629633
return VideoEncoder(
630634
frames,
631635
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -640,8 +644,9 @@ void _encode_video_to_file_like(
640644
int64_t frame_rate,
641645
std::string_view format,
642646
int64_t file_like_context,
643-
std::optional<std::string> pixel_format = std::nullopt,
644-
std::optional<int64_t> crf = std::nullopt) {
647+
std::optional<std::string_view> pixel_format = std::nullopt,
648+
std::optional<double> crf = std::nullopt,
649+
std::optional<std::string_view> preset = std::nullopt) {
645650
auto fileLikeContext =
646651
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
647652
TORCH_CHECK(
@@ -651,6 +656,7 @@ void _encode_video_to_file_like(
651656
VideoStreamOptions videoStreamOptions;
652657
videoStreamOptions.pixelFormat = pixel_format;
653658
videoStreamOptions.crf = crf;
659+
videoStreamOptions.preset = preset;
654660

655661
VideoEncoder encoder(
656662
frames,

src/torchcodec/_core/ops.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,9 @@ def encode_video_to_file_like(
213213
frame_rate: int,
214214
format: str,
215215
file_like: Union[io.RawIOBase, io.BufferedIOBase],
216-
crf: Optional[int] = None,
216+
crf: Optional[Union[int, float]] = None,
217217
pixel_format: Optional[str] = None,
218+
preset: Optional[str] = None,
218219
) -> None:
219220
"""Encode video frames to a file-like object.
220221
@@ -225,6 +226,7 @@ def encode_video_to_file_like(
225226
file_like: File-like object that supports write() and seek() methods
226227
crf: Optional constant rate factor for encoding quality
227228
pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
229+
preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
228230
"""
229231
assert _pybind_ops is not None
230232

@@ -235,6 +237,7 @@ def encode_video_to_file_like(
235237
_pybind_ops.create_file_like_context(file_like, True), # True means for writing
236238
pixel_format,
237239
crf,
240+
preset,
238241
)
239242

240243

@@ -322,8 +325,9 @@ def encode_video_to_file_abstract(
322325
frames: torch.Tensor,
323326
frame_rate: int,
324327
filename: str,
325-
crf: Optional[int] = None,
326328
pixel_format: Optional[str] = None,
329+
crf: Optional[Union[int, float]] = None,
330+
preset: Optional[str] = None,
327331
) -> None:
328332
return
329333

@@ -333,8 +337,9 @@ def encode_video_to_tensor_abstract(
333337
frames: torch.Tensor,
334338
frame_rate: int,
335339
format: str,
336-
crf: Optional[int] = None,
337340
pixel_format: Optional[str] = None,
341+
crf: Optional[Union[int, float]] = None,
342+
preset: Optional[str] = None,
338343
) -> torch.Tensor:
339344
return torch.empty([], dtype=torch.long)
340345

@@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract(
345350
frame_rate: int,
346351
format: str,
347352
file_like_context: int,
348-
crf: Optional[int] = None,
349353
pixel_format: Optional[str] = None,
354+
crf: Optional[Union[int, float]] = None,
355+
preset: Optional[str] = None,
350356
) -> None:
351357
return
352358

0 commit comments

Comments
 (0)