meta-pytorch
diff --git a/‎.github/workflows/docs.yaml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/docs.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/lint.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 2 additions & 1 deletion b/‎.github/workflows/linux_cuda_wheel.yaml‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 3 additions & 3 deletions b/‎README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/api_ref_transforms.rst‎
Lines changed: 17 additions & 0 deletions b/‎docs/source/api_ref_transforms.rst‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mypy.ini‎
Lines changed: 1 addition & 0 deletions b/‎mypy.ini‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/torchcodec/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/torchcodec/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 101 additions & 26 deletions b/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 101 additions & 26 deletions
@@ -84,6 +84,7 @@ jobs:
           ${CONDA_RUN} conda info
           ${CONDA_RUN} nvidia-smi
           ${CONDA_RUN} conda list
+          echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
       - name: Assert ffmpeg exists
         run: |
           ${CONDA_RUN} ffmpeg -buildconf
 
@@ -62,7 +62,7 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install dependencies and FFmpeg
         run: |
-          python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+          python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
           conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
 
@@ -95,12 +95,13 @@ jobs:
           # We install conda packages at the start because otherwise conda may have conflicts with dependencies.
           # Note: xorg-libxau was addded to fix a problem with ffmpeg 4. We should consider removing it.
           default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }} conda-forge::xorg-libxau"
-      - name: Check env
+      - name: Check env, set LD_LIBRARY_PATH
         run: |
           ${CONDA_RUN} env
           ${CONDA_RUN} conda info
           ${CONDA_RUN} nvidia-smi
           ${CONDA_RUN} conda list
+          echo LD_LIBRARY_PATH=$CONDA_PREFIX/lib:/usr/local/cuda/lib64/:${LD_LIBRARY_PATH} >> $GITHUB_ENV
       - name: Assert ffmpeg exists
         run: |
           ${CONDA_RUN} ffmpeg -buildconf
 
@@ -1,4 +1,4 @@
-[**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
+[**Installation**](#installing-torchcodec) | [**Simple Example**](#using-torchcodec) | [**Detailed Example**](https://meta-pytorch.org/torchcodec/stable/generated_examples/) | [**Documentation**](https://meta-pytorch.org/torchcodec) | [**Contributing**](CONTRIBUTING.md) | [**License**](#license)
 
 # TorchCodec
 
@@ -23,7 +23,7 @@ We achieve these capabilities through:
 
 Here's a condensed summary of what you can do with TorchCodec. For more detailed
 examples, [check out our
-documentation](https://pytorch.org/torchcodec/stable/generated_examples/)!
+documentation](https://meta-pytorch.org/torchcodec/stable/generated_examples/)!
 
 #### Decoding
 
@@ -219,7 +219,7 @@ The bottom row is [promotional video from NASA](https://download.pytorch.org/tor
 that has a resolution of 960x540 at 29.7 fps and is 206 seconds long. Both videos were
 encoded with libx264 and yuv420p pixel format. All decoders, except for TorchVision, used FFmpeg 6.1.2. TorchVision used FFmpeg 4.2.2.
 
-For TorchCodec, the "approx" label means that it was using [approximate mode](https://pytorch.org/torchcodec/stable/generated_examples/approximate_mode.html)
+For TorchCodec, the "approx" label means that it was using [approximate mode](https://meta-pytorch.org/torchcodec/stable/generated_examples/decoding/approximate_mode.html)
 for seeking.
 
 ## Contributing
 
@@ -0,0 +1,17 @@
+.. _transforms:
+
+=====================
+torchcodec.transforms
+=====================
+
+.. currentmodule:: torchcodec.transforms
+
+For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
+
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+    :template: dataclass.rst
+
+    DecoderTransform
+    Resize
@@ -209,6 +209,7 @@ def __call__(self, filename):
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "torch": ("https://pytorch.org/docs/stable/", None),
+    "torchvision": ("https://docs.pytorch.org/vision/stable/", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
     "PIL": ("https://pillow.readthedocs.io/en/stable/", None),
     "matplotlib": ("https://matplotlib.org/stable/", None),
 
@@ -125,3 +125,4 @@ Encoding
    api_ref_decoders
    api_ref_encoders
    api_ref_samplers
+   api_ref_transforms
@@ -4,3 +4,4 @@ files = src/torchcodec
 show_error_codes = True
 pretty = True
 allow_redefinition = True
+follow_untyped_imports = True
@@ -9,7 +9,7 @@
 # Note: usort wants to put Frame and FrameBatch after decoders and samplers,
 # but that results in circular import.
 from ._frame import AudioSamples, Frame, FrameBatch  # usort:skip # noqa
-from . import decoders, encoders, samplers  # noqa
+from . import decoders, encoders, samplers, transforms  # noqa
 
 try:
     # Note that version.py is generated during install.
 
@@ -570,10 +570,10 @@ AVPixelFormat validatePixelFormat(
   TORCH_CHECK(false, errorMsg.str());
 }
 
-void validateDoubleOption(
+void tryToValidateCodecOption(
     const AVCodec& avCodec,
     const char* optionName,
-    double value) {
+    const std::string& value) {
   if (!avCodec.priv_class) {
     return;
   }
@@ -586,24 +586,60 @@ void validateDoubleOption(
       0,
       AV_OPT_SEARCH_FAKE_OBJ,
       nullptr);
-  // If the option was not found, let FFmpeg handle it later
+  // If option is not found we cannot validate it, let FFmpeg handle it
   if (!option) {
     return;
   }
+  // Validate if option is defined as a numeric type
   if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
       option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
-    TORCH_CHECK(
-        value >= option->min && value <= option->max,
-        optionName,
-        "=",
-        value,
-        " is out of valid range [",
-        option->min,
-        ", ",
-        option->max,
-        "] for this codec. For more details, run 'ffmpeg -h encoder=",
-        avCodec.name,
-        "'");
+    try {
+      double numericValue = std::stod(value);
+      TORCH_CHECK(
+          numericValue >= option->min && numericValue <= option->max,
+          optionName,
+          "=",
+          numericValue,
+          " is out of valid range [",
+          option->min,
+          ", ",
+          option->max,
+          "] for this codec. For more details, run 'ffmpeg -h encoder=",
+          avCodec.name,
+          "'");
+    } catch (const std::invalid_argument& e) {
+      TORCH_CHECK(
+          false,
+          "Option ",
+          optionName,
+          " expects a numeric value but got '",
+          value,
+          "'");
+    }
+  }
+}
+
+void sortCodecOptions(
+    const std::map<std::string, std::string>& extraOptions,
+    AVDictionary** codecDict,
+    AVDictionary** formatDict) {
+  // Accepts a map of options as input, then sorts them into codec options and
+  // format options. The sorted options are returned into two separate dicts.
+  const AVClass* formatClass = avformat_get_class();
+  for (const auto& [key, value] : extraOptions) {
+    const AVOption* fmtOpt = av_opt_find2(
+        &formatClass,
+        key.c_str(),
+        nullptr,
+        0,
+        AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ,
+        nullptr);
+    if (fmtOpt) {
+      av_dict_set(formatDict, key.c_str(), value.c_str(), 0);
+    } else {
+      // Default to codec option (includes AVCodecContext + encoder-private)
+      av_dict_set(codecDict, key.c_str(), value.c_str(), 0);
+    }
   }
 }
 } // namespace
@@ -621,6 +657,7 @@ VideoEncoder::~VideoEncoder() {
       avFormatContext_->pb = nullptr;
     }
   }
+  av_dict_free(&avFormatOptions_);
 }
 
 VideoEncoder::VideoEncoder(
@@ -687,9 +724,33 @@ VideoEncoder::VideoEncoder(
 
 void VideoEncoder::initializeEncoder(
     const VideoStreamOptions& videoStreamOptions) {
-  const AVCodec* avCodec =
-      avcodec_find_encoder(avFormatContext_->oformat->video_codec);
-  TORCH_CHECK(avCodec != nullptr, "Video codec not found");
+  const AVCodec* avCodec = nullptr;
+  // If codec arg is provided, find codec using logic similar to FFmpeg:
+  // https://github.com/FFmpeg/FFmpeg/blob/master/fftools/ffmpeg_opt.c#L804-L835
+  if (videoStreamOptions.codec.has_value()) {
+    const std::string& codec = videoStreamOptions.codec.value();
+    // Try to find codec by name ("libx264", "libsvtav1")
+    avCodec = avcodec_find_encoder_by_name(codec.c_str());
+    // Try to find by codec descriptor ("h264", "av1")
+    if (!avCodec) {
+      const AVCodecDescriptor* desc =
+          avcodec_descriptor_get_by_name(codec.c_str());
+      if (desc) {
+        avCodec = avcodec_find_encoder(desc->id);
+      }
+    }
+    TORCH_CHECK(
+        avCodec != nullptr,
+        "Video codec ",
+        codec,
+        " not found. To see available codecs, run: ffmpeg -encoders");
+  } else {
+    TORCH_CHECK(
+        avFormatContext_->oformat != nullptr,
+        "Output format is null, unable to find default codec.");
+    avCodec = avcodec_find_encoder(avFormatContext_->oformat->video_codec);
+    TORCH_CHECK(avCodec != nullptr, "Video codec not found");
+  }
 
   AVCodecContext* avCodecContext = avcodec_alloc_context3(avCodec);
   TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context.");
@@ -736,17 +797,31 @@ void VideoEncoder::initializeEncoder(
   }
 
   // Apply videoStreamOptions
-  AVDictionary* options = nullptr;
+  AVDictionary* avCodecOptions = nullptr;
+  if (videoStreamOptions.extraOptions.has_value()) {
+    for (const auto& [key, value] : videoStreamOptions.extraOptions.value()) {
+      tryToValidateCodecOption(*avCodec, key.c_str(), value);
+    }
+    sortCodecOptions(
+        videoStreamOptions.extraOptions.value(),
+        &avCodecOptions,
+        &avFormatOptions_);
+  }
+
   if (videoStreamOptions.crf.has_value()) {
-    validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
+    std::string crfValue = std::to_string(videoStreamOptions.crf.value());
+    tryToValidateCodecOption(*avCodec, "crf", crfValue);
+    av_dict_set(&avCodecOptions, "crf", crfValue.c_str(), 0);
+  }
+  if (videoStreamOptions.preset.has_value()) {
     av_dict_set(
-        &options,
-        "crf",
-        std::to_string(videoStreamOptions.crf.value()).c_str(),
+        &avCodecOptions,
+        "preset",
+        videoStreamOptions.preset.value().c_str(),
         0);
   }
-  int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
-  av_dict_free(&options);
+  int status = avcodec_open2(avCodecContext_.get(), avCodec, &avCodecOptions);
+  av_dict_free(&avCodecOptions);
 
   TORCH_CHECK(
       status == AVSUCCESS,
@@ -771,7 +846,7 @@ void VideoEncoder::encode() {
   TORCH_CHECK(!encodeWasCalled_, "Cannot call encode() twice.");
   encodeWasCalled_ = true;
 
-  int status = avformat_write_header(avFormatContext_.get(), nullptr);
+  int status = avformat_write_header(avFormatContext_.get(), &avFormatOptions_);
   TORCH_CHECK(
       status == AVSUCCESS,
       "Error in avformat_write_header: ",