Skip to content

Commit 2979900

Browse files
authored
Merge branch 'meta-pytorch:main' into fallback-container-duration
2 parents 27718be + 17164fd commit 2979900

File tree

20 files changed

+810
-132
lines changed

20 files changed

+810
-132
lines changed

.github/workflows/docs.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ jobs:
8484
${CONDA_RUN} conda info
8585
${CONDA_RUN} nvidia-smi
8686
${CONDA_RUN} conda list
87+
echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
8788
- name: Assert ffmpeg exists
8889
run: |
8990
${CONDA_RUN} ffmpeg -buildconf

.github/workflows/lint.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
run: python -m pip install --upgrade pip
6363
- name: Install dependencies and FFmpeg
6464
run: |
65-
python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
65+
python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
6666
conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
6767
ffmpeg -version
6868
- name: Build and install torchcodec

.github/workflows/linux_cuda_wheel.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,12 +95,13 @@ jobs:
9595
# We install conda packages at the start because otherwise conda may have conflicts with dependencies.
9696
# Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it.
9797
default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau"
98-
- name: Check env
98+
- name: Check env, set LD_LIBRARY_PATH
9999
run: |
100100
${CONDA_RUN} env
101101
${CONDA_RUN} conda info
102102
${CONDA_RUN} nvidia-smi
103103
${CONDA_RUN} conda list
104+
echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
104105
- name: Assert ffmpeg exists
105106
run: |
106107
${CONDA_RUN} ffmpeg -buildconf

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
[**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
1+
[**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://meta-pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://meta-pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
22

33
# TorchCodec
44

@@ -23,7 +23,7 @@ We achieve these capabilities through:
2323

2424
Here's a condensed summary of what you can do with TorchCodec. For more detailed
2525
examples, [check out our
26-
documentation](https://pytorch.org/torchcodec/stable/generated_examples/)!
26+
documentation](https://meta-pytorch.org/torchcodec/stable/generated_examples/)!
2727

2828
#### Decoding
2929

@@ -219,7 +219,7 @@ The bottom row is [promotional video from NASA](https://download.pytorch.org/tor
219219
that has a resolution of 960x540 at 29.7 fps and is 206 seconds long. Both videos were
220220
encoded with libx264 and yuv420p pixel format. All decoders, except for TorchVision, used FFmpeg 6.1.2. TorchVision used FFmpeg 4.2.2.
221221

222-
For TorchCodec, the "approx" label means that it was using [approximate mode](https://pytorch.org/torchcodec/stable/generated_examples/approximate_mode.html)
222+
For TorchCodec, the "approx" label means that it was using [approximate mode](https://meta-pytorch.org/torchcodec/stable/generated_examples/decoding/approximate_mode.html)
223223
for seeking.
224224

225225
## Contributing

docs/source/api_ref_transforms.rst

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
.. _transforms:
2+
3+
=====================
4+
torchcodec.transforms
5+
=====================
6+
7+
.. currentmodule:: torchcodec.transforms
8+
9+
For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
10+
11+
.. autosummary::
12+
:toctree: generated/
13+
:nosignatures:
14+
:template: dataclass.rst
15+
16+
DecoderTransform
17+
Resize

docs/source/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def __call__(self, filename):
209209
intersphinx_mapping = {
210210
"python": ("https://docs.python.org/3/", None),
211211
"torch": ("https://pytorch.org/docs/stable/", None),
212+
"torchvision": ("https://docs.pytorch.org/vision/stable/", None),
212213
"numpy": ("https://numpy.org/doc/stable/", None),
213214
"PIL": ("https://pillow.readthedocs.io/en/stable/", None),
214215
"matplotlib": ("https://matplotlib.org/stable/", None),

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,3 +125,4 @@ Encoding
125125
api_ref_decoders
126126
api_ref_encoders
127127
api_ref_samplers
128+
api_ref_transforms

mypy.ini

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ files = src/torchcodec
44
show_error_codes = True
55
pretty = True
66
allow_redefinition = True
7+
follow_untyped_imports = True

src/torchcodec/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Note: usort wants to put Frame and FrameBatch after decoders and samplers,
1010
# but that results in circular import.
1111
from ._frame import AudioSamples, Frame, FrameBatch # usort:skip # noqa
12-
from . import decoders, encoders, samplers # noqa
12+
from . import decoders, encoders, samplers, transforms # noqa
1313

1414
try:
1515
# Note that version.py is generated during install.

src/torchcodec/_core/Encoder.cpp

Lines changed: 101 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -570,10 +570,10 @@ AVPixelFormat validatePixelFormat(
570570
TORCH_CHECK(false, errorMsg.str());
571571
}
572572

573-
void validateDoubleOption(
573+
void tryToValidateCodecOption(
574574
const AVCodec& avCodec,
575575
const char* optionName,
576-
double value) {
576+
const std::string& value) {
577577
if (!avCodec.priv_class) {
578578
return;
579579
}
@@ -586,24 +586,60 @@ void validateDoubleOption(
586586
0,
587587
AV_OPT_SEARCH_FAKE_OBJ,
588588
nullptr);
589-
// If the option was not found, let FFmpeg handle it later
589+
// If option is not found we cannot validate it, let FFmpeg handle it
590590
if (!option) {
591591
return;
592592
}
593+
// Validate if option is defined as a numeric type
593594
if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
594595
option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
595-
TORCH_CHECK(
596-
value >= option->min && value <= option->max,
597-
optionName,
598-
"=",
599-
value,
600-
" is out of valid range [",
601-
option->min,
602-
", ",
603-
option->max,
604-
"] for this codec. For more details, run 'ffmpeg -h encoder=",
605-
avCodec.name,
606-
"'");
596+
try {
597+
double numericValue = std::stod(value);
598+
TORCH_CHECK(
599+
numericValue >= option->min && numericValue <= option->max,
600+
optionName,
601+
"=",
602+
numericValue,
603+
" is out of valid range [",
604+
option->min,
605+
", ",
606+
option->max,
607+
"] for this codec. For more details, run 'ffmpeg -h encoder=",
608+
avCodec.name,
609+
"'");
610+
} catch (const std::invalid_argument& e) {
611+
TORCH_CHECK(
612+
false,
613+
"Option ",
614+
optionName,
615+
" expects a numeric value but got '",
616+
value,
617+
"'");
618+
}
619+
}
620+
}
621+
622+
void sortCodecOptions(
623+
const std::map<std::string, std::string>& extraOptions,
624+
AVDictionary** codecDict,
625+
AVDictionary** formatDict) {
626+
// Accepts a map of options as input, then sorts them into codec options and
627+
// format options. The sorted options are returned into two separate dicts.
628+
const AVClass* formatClass = avformat_get_class();
629+
for (const auto& [key, value] : extraOptions) {
630+
const AVOption* fmtOpt = av_opt_find2(
631+
&formatClass,
632+
key.c_str(),
633+
nullptr,
634+
0,
635+
AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ,
636+
nullptr);
637+
if (fmtOpt) {
638+
av_dict_set(formatDict, key.c_str(), value.c_str(), 0);
639+
} else {
640+
// Default to codec option (includes AVCodecContext + encoder-private)
641+
av_dict_set(codecDict, key.c_str(), value.c_str(), 0);
642+
}
607643
}
608644
}
609645
} // namespace
@@ -621,6 +657,7 @@ VideoEncoder::~VideoEncoder() {
621657
avFormatContext_->pb = nullptr;
622658
}
623659
}
660+
av_dict_free(&avFormatOptions_);
624661
}
625662

626663
VideoEncoder::VideoEncoder(
@@ -687,9 +724,33 @@ VideoEncoder::VideoEncoder(
687724

688725
void VideoEncoder::initializeEncoder(
689726
const VideoStreamOptions& videoStreamOptions) {
690-
const AVCodec* avCodec =
691-
avcodec_find_encoder(avFormatContext_->oformat->video_codec);
692-
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
727+
const AVCodec* avCodec = nullptr;
728+
// If codec arg is provided, find codec using logic similar to FFmpeg:
729+
// https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835
730+
if (videoStreamOptions.codec.has_value()) {
731+
const std::string& codec = videoStreamOptions.codec.value();
732+
// Try to find codec by name ("libx264", "libsvtav1")
733+
avCodec = avcodec_find_encoder_by_name(codec.c_str());
734+
// Try to find by codec descriptor ("h264", "av1")
735+
if (!avCodec) {
736+
const AVCodecDescriptor* desc =
737+
avcodec_descriptor_get_by_name(codec.c_str());
738+
if (desc) {
739+
avCodec = avcodec_find_encoder(desc->id);
740+
}
741+
}
742+
TORCH_CHECK(
743+
avCodec != nullptr,
744+
"Video codec ",
745+
codec,
746+
" not found. To see available codecs, run: ffmpeg -encoders");
747+
} else {
748+
TORCH_CHECK(
749+
avFormatContext_->oformat != nullptr,
750+
"Output format is null, unable to find default codec.");
751+
avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
752+
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
753+
}
693754

694755
AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec);
695756
TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context.");
@@ -736,17 +797,31 @@ void VideoEncoder::initializeEncoder(
736797
}
737798

738799
// Apply videoStreamOptions
739-
AVDictionary* options = nullptr;
800+
AVDictionary* avCodecOptions = nullptr;
801+
if (videoStreamOptions.extraOptions.has_value()) {
802+
for (const auto& [key, value] : videoStreamOptions.extraOptions.value()) {
803+
tryToValidateCodecOption(*avCodec, key.c_str(), value);
804+
}
805+
sortCodecOptions(
806+
videoStreamOptions.extraOptions.value(),
807+
&avCodecOptions,
808+
&avFormatOptions_);
809+
}
810+
740811
if (videoStreamOptions.crf.has_value()) {
741-
validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
812+
std::string crfValue = std::to_string(videoStreamOptions.crf.value());
813+
tryToValidateCodecOption(*avCodec, "crf", crfValue);
814+
av_dict_set(&avCodecOptions, "crf", crfValue.c_str(), 0);
815+
}
816+
if (videoStreamOptions.preset.has_value()) {
742817
av_dict_set(
743-
&options,
744-
"crf",
745-
std::to_string(videoStreamOptions.crf.value()).c_str(),
818+
&avCodecOptions,
819+
"preset",
820+
videoStreamOptions.preset.value().c_str(),
746821
0);
747822
}
748-
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
749-
av_dict_free(&options);
823+
int status = avcodec_open2(avCodecContext_.get(), avCodec, &avCodecOptions);
824+
av_dict_free(&avCodecOptions);
750825

751826
TORCH_CHECK(
752827
status == AVSUCCESS,
@@ -771,7 +846,7 @@ void VideoEncoder::encode() {
771846
TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice.");
772847
encodeWasCalled_ = true;
773848

774-
int status = avformat_write_header(avFormatContext_.get(), nullptr);
849+
int status = avformat_write_header(avFormatContext_.get(), &avFormatOptions_);
775850
TORCH_CHECK(
776851
status == AVSUCCESS,
777852
"Error in avformat_write_header: ",

0 commit comments

Comments
 (0)