meta-pytorch
diff --git a/‎.github/workflows/lint.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/lint.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/api_ref_transforms.rst‎
Lines changed: 17 additions & 0 deletions b/‎docs/source/api_ref_transforms.rst‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎mypy.ini‎
Lines changed: 1 addition & 0 deletions b/‎mypy.ini‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/torchcodec/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/torchcodec/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 43 additions & 0 deletions b/‎src/torchcodec/_core/Encoder.cpp‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎src/torchcodec/_core/StreamOptions.h‎
Lines changed: 2 additions & 4 deletions b/‎src/torchcodec/_core/StreamOptions.h‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/torchcodec/_core/custom_ops.cpp‎
Lines changed: 15 additions & 9 deletions b/‎src/torchcodec/_core/custom_ops.cpp‎
Lines changed: 15 additions & 9 deletions
diff --git a/‎src/torchcodec/_core/ops.py‎
Lines changed: 10 additions & 4 deletions b/‎src/torchcodec/_core/ops.py‎
Lines changed: 10 additions & 4 deletions
@@ -62,7 +62,7 @@ jobs:
         run: python -m pip install --upgrade pip
       - name: Install dependencies and FFmpeg
         run: |
-          python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
+          python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
           conda install "ffmpeg=7.0.1" pkg-config pybind11 -c conda-forge
           ffmpeg -version
       - name: Build and install torchcodec
 
@@ -0,0 +1,17 @@
+.. _transforms:
+
+=====================
+torchcodec.transforms
+=====================
+
+.. currentmodule:: torchcodec.transforms
+
+For a tutorial, see: TODO_DECODER_TRANSFORMS_TUTORIAL.
+
+.. autosummary::
+    :toctree: generated/
+    :nosignatures:
+    :template: dataclass.rst
+
+    DecoderTransform
+    Resize
@@ -209,6 +209,7 @@ def __call__(self, filename):
 intersphinx_mapping = {
     "python": ("https://docs.python.org/3/", None),
     "torch": ("https://pytorch.org/docs/stable/", None),
+    "torchvision": ("https://docs.pytorch.org/vision/stable/", None),
     "numpy": ("https://numpy.org/doc/stable/", None),
     "PIL": ("https://pillow.readthedocs.io/en/stable/", None),
     "matplotlib": ("https://matplotlib.org/stable/", None),
 
@@ -125,3 +125,4 @@ Encoding
    api_ref_decoders
    api_ref_encoders
    api_ref_samplers
+   api_ref_transforms
@@ -4,3 +4,4 @@ files = src/torchcodec
 show_error_codes = True
 pretty = True
 allow_redefinition = True
+follow_untyped_imports = True
@@ -9,7 +9,7 @@
 # Note: usort wants to put Frame and FrameBatch after decoders and samplers,
 # but that results in circular import.
 from ._frame import AudioSamples, Frame, FrameBatch  # usort:skip # noqa
-from . import decoders, encoders, samplers  # noqa
+from . import decoders, encoders, samplers, transforms  # noqa
 
 try:
     # Note that version.py is generated during install.
 
@@ -5,6 +5,7 @@
 #include "torch/types.h"
 
 extern "C" {
+#include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 }
 
@@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat(
   }
   TORCH_CHECK(false, errorMsg.str());
 }
+
+void validateDoubleOption(
+    const AVCodec& avCodec,
+    const char* optionName,
+    double value) {
+  if (!avCodec.priv_class) {
+    return;
+  }
+  const AVOption* option = av_opt_find2(
+      // Convert obj arg from const AVClass* const* to non-const void*
+      // First cast to remove const, then cast to void*
+      const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
+      optionName,
+      nullptr,
+      0,
+      AV_OPT_SEARCH_FAKE_OBJ,
+      nullptr);
+  // If the option was not found, let FFmpeg handle it later
+  if (!option) {
+    return;
+  }
+  if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
+      option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
+    TORCH_CHECK(
+        value >= option->min && value <= option->max,
+        optionName,
+        "=",
+        value,
+        " is out of valid range [",
+        option->min,
+        ", ",
+        option->max,
+        "] for this codec. For more details, run 'ffmpeg -h encoder=",
+        avCodec.name,
+        "'");
+  }
+}
 } // namespace
 
 VideoEncoder::~VideoEncoder() {
@@ -700,12 +738,17 @@ void VideoEncoder::initializeEncoder(
   // Apply videoStreamOptions
   AVDictionary* options = nullptr;
   if (videoStreamOptions.crf.has_value()) {
+    validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
     av_dict_set(
         &options,
         "crf",
         std::to_string(videoStreamOptions.crf.value()).c_str(),
         0);
   }
+  if (videoStreamOptions.preset.has_value()) {
+    av_dict_set(
+        &options, "preset", videoStreamOptions.preset.value().c_str(), 0);
+  }
   int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
   av_dict_free(&options);
 
 
@@ -45,13 +45,11 @@ struct VideoStreamOptions {
   std::string_view deviceVariant = "ffmpeg";
 
   // Encoding options
-  // TODO-VideoEncoder: Consider adding other optional fields here
-  // (bit rate, gop size, max b frames, preset)
-  std::optional<int> crf;
-
   // Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
   // If not specified, uses codec's default format.
   std::optional<std::string> pixelFormat;
+  std::optional<double> crf;
+  std::optional<std::string> preset;
 };
 
 struct AudioStreamOptions {
 
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -603,11 +603,13 @@ void encode_video_to_file(
     const at::Tensor& frames,
     int64_t frame_rate,
     std::string_view file_name,
-    std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<std::string_view> pixel_format = std::nullopt,
+    std::optional<double> crf = std::nullopt,
+    std::optional<std::string_view> preset = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
+  videoStreamOptions.preset = preset;
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -620,12 +622,14 @@ at::Tensor encode_video_to_tensor(
     const at::Tensor& frames,
     int64_t frame_rate,
     std::string_view format,
-    std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<std::string_view> pixel_format = std::nullopt,
+    std::optional<double> crf = std::nullopt,
+    std::optional<std::string_view> preset = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
+  videoStreamOptions.preset = preset;
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -640,8 +644,9 @@ void _encode_video_to_file_like(
     int64_t frame_rate,
     std::string_view format,
     int64_t file_like_context,
-    std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<std::string_view> pixel_format = std::nullopt,
+    std::optional<double> crf = std::nullopt,
+    std::optional<std::string_view> preset = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
@@ -651,6 +656,7 @@ void _encode_video_to_file_like(
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
+  videoStreamOptions.preset = preset;
 
   VideoEncoder encoder(
       frames,
 
@@ -213,8 +213,9 @@ def encode_video_to_file_like(
     frame_rate: int,
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
+    preset: Optional[str] = None,
 ) -> None:
     """Encode video frames to a file-like object.
 
@@ -225,6 +226,7 @@ def encode_video_to_file_like(
         file_like: File-like object that supports write() and seek() methods
         crf: Optional constant rate factor for encoding quality
         pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
+        preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
     """
     assert _pybind_ops is not None
 
@@ -235,6 +237,7 @@ def encode_video_to_file_like(
         _pybind_ops.create_file_like_context(file_like, True),  # True means for writing
         pixel_format,
         crf,
+        preset,
     )
 
 
@@ -322,8 +325,9 @@ def encode_video_to_file_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     filename: str,
-    crf: Optional[int] = None,
     pixel_format: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    preset: Optional[str] = None,
 ) -> None:
     return
 
@@ -333,8 +337,9 @@ def encode_video_to_tensor_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     format: str,
-    crf: Optional[int] = None,
     pixel_format: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    preset: Optional[str] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract(
     frame_rate: int,
     format: str,
     file_like_context: int,
-    crf: Optional[int] = None,
     pixel_format: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    preset: Optional[str] = None,
 ) -> None:
     return