meta-pytorch · Dan-Flores · Nov 13, 2025 · Nov 7, 2025 · Nov 7, 2025 · Nov 10, 2025
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -5,6 +5,7 @@
 #include "torch/types.h"
 
 extern "C" {
+#include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 }
 
@@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat(
   }
   TORCH_CHECK(false, errorMsg.str());
 }
+
+void validateDoubleOption(
+    const AVCodec& avCodec,
+    const char* optionName,
+    double value) {
+  if (!avCodec.priv_class) {
+    return;
+  }
+  const AVOption* option = av_opt_find2(
+      // Convert obj arg from const AVClass* const* to non-const void*
+      // First cast to remove const, then cast to void*
+      const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
+      optionName,
+      nullptr,
+      0,
+      AV_OPT_SEARCH_FAKE_OBJ,
+      nullptr);
+  // If the option was not found, let FFmpeg handle it later
+  if (!option) {
+    return;
+  }
+  if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
+      option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
+    TORCH_CHECK(
+        value >= option->min && value <= option->max,
+        optionName,
+        "=",
+        value,
+        " is out of valid range [",
+        option->min,
+        ", ",
+        option->max,
+        "] for this codec. For more details, run 'ffmpeg -h encoder=",
+        avCodec.name,
+        "'");
+  }
+}
 } // namespace
 
 VideoEncoder::~VideoEncoder() {
@@ -700,12 +738,17 @@ void VideoEncoder::initializeEncoder(
   // Apply videoStreamOptions
   AVDictionary* options = nullptr;
   if (videoStreamOptions.crf.has_value()) {
+    validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
     av_dict_set(
         &options,
         "crf",
         std::to_string(videoStreamOptions.crf.value()).c_str(),
         0);
   }
+  if (videoStreamOptions.preset.has_value()) {
+    av_dict_set(
+        &options, "preset", videoStreamOptions.preset.value().c_str(), 0);
+  }
   int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
   av_dict_free(&options);
 

diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
@@ -45,13 +45,11 @@ struct VideoStreamOptions {
   std::string_view deviceVariant = "ffmpeg";
 
   // Encoding options
-  // TODO-VideoEncoder: Consider adding other optional fields here
-  // (bit rate, gop size, max b frames, preset)
-  std::optional<int> crf;
-
   // Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
   // If not specified, uses codec's default format.
   std::optional<std::string> pixelFormat;
+  std::optional<double> crf;
+  std::optional<std::string> preset;
 };
 
 struct AudioStreamOptions {

diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -603,11 +603,13 @@ void encode_video_to_file(
     const at::Tensor& frames,
     int64_t frame_rate,
     std::string_view file_name,
-    std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<std::string_view> pixel_format = std::nullopt,
+    std::optional<double> crf = std::nullopt,
+    std::optional<std::string_view> preset = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
+  videoStreamOptions.preset = preset;
   VideoEncoder(
       frames,
       validateInt64ToInt(frame_rate, "frame_rate"),
@@ -620,12 +622,14 @@ at::Tensor encode_video_to_tensor(
     const at::Tensor& frames,
     int64_t frame_rate,
     std::string_view format,
-    std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<std::string_view> pixel_format = std::nullopt,
+    std::optional<double> crf = std::nullopt,
+    std::optional<std::string_view> preset = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
+  videoStreamOptions.preset = preset;
   return VideoEncoder(
              frames,
              validateInt64ToInt(frame_rate, "frame_rate"),
@@ -640,8 +644,9 @@ void _encode_video_to_file_like(
     int64_t frame_rate,
     std::string_view format,
     int64_t file_like_context,
-    std::optional<std::string> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt) {
+    std::optional<std::string_view> pixel_format = std::nullopt,
+    std::optional<double> crf = std::nullopt,
+    std::optional<std::string_view> preset = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
   TORCH_CHECK(
@@ -651,6 +656,7 @@ void _encode_video_to_file_like(
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
   videoStreamOptions.crf = crf;
+  videoStreamOptions.preset = preset;
 
   VideoEncoder encoder(
       frames,

diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -213,8 +213,9 @@ def encode_video_to_file_like(
     frame_rate: int,
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
+    preset: Optional[str] = None,
 ) -> None:
     """Encode video frames to a file-like object.
 
@@ -225,6 +226,7 @@ def encode_video_to_file_like(
         file_like: File-like object that supports write() and seek() methods
         crf: Optional constant rate factor for encoding quality
         pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
+        preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
     """
     assert _pybind_ops is not None
 
@@ -235,6 +237,7 @@ def encode_video_to_file_like(
         _pybind_ops.create_file_like_context(file_like, True),  # True means for writing
         pixel_format,
         crf,
+        preset,
     )
 
 
@@ -322,8 +325,9 @@ def encode_video_to_file_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     filename: str,
-    crf: Optional[int] = None,
     pixel_format: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    preset: Optional[str] = None,
 ) -> None:
     return
 
@@ -333,8 +337,9 @@ def encode_video_to_tensor_abstract(
     frames: torch.Tensor,
     frame_rate: int,
     format: str,
-    crf: Optional[int] = None,
     pixel_format: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    preset: Optional[str] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
 
@@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract(
     frame_rate: int,
     format: str,
     file_like_context: int,
-    crf: Optional[int] = None,
     pixel_format: Optional[str] = None,
+    crf: Optional[Union[int, float]] = None,
+    preset: Optional[str] = None,
 ) -> None:
     return
 

diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -37,6 +37,8 @@ def to_file(
         dest: Union[str, Path],
         *,
         pixel_format: Optional[str] = None,
+        crf: Optional[Union[int, float]] = None,
+        preset: Optional[Union[str, int]] = None,
     ) -> None:
         """Encode frames into a file.
 
@@ -46,36 +48,58 @@ def to_file(
                 container format.
             pixel_format (str, optional): The pixel format for encoding (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
+                mean better quality. Valid range depends on the encoder (commonly 0-51).
+                Defaults to None (which will use encoder's default).
+            preset (str or int, optional): Encoder option that controls the tradeoff between
+                encoding speed and compression. Valid values depend on the encoder (commonly
+                a string: "fast", "medium", "slow"). Defaults to None
+                (which will use encoder's default).
         """
+        preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file(
             frames=self._frames,
             frame_rate=self._frame_rate,
             filename=str(dest),
             pixel_format=pixel_format,
+            crf=crf,
+            preset=preset,
         )
 
     def to_tensor(
         self,
         format: str,
         *,
         pixel_format: Optional[str] = None,
+        crf: Optional[Union[int, float]] = None,
+        preset: Optional[Union[str, int]] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
 
         Args:
             format (str): The container format of the encoded frames, e.g. "mp4", "mov",
-            "mkv", "avi", "webm", "flv", or "gif"
+                "mkv", "avi", "webm", "flv", etc.
             pixel_format (str, optional): The pixel format to encode frames into (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
+                mean better quality. Valid range depends on the encoder (commonly 0-51).
+                Defaults to None (which will use encoder's default).
+            preset (str or int, optional): Encoder option that controls the tradeoff between
+                encoding speed and compression. Valid values depend on the encoder (commonly
+                a string: "fast", "medium", "slow"). Defaults to None
+                (which will use encoder's default).
 
         Returns:
             Tensor: The raw encoded bytes as 4D uint8 Tensor.
         """
+        preset_value = str(preset) if isinstance(preset, int) else preset
         return _core.encode_video_to_tensor(
             frames=self._frames,
             frame_rate=self._frame_rate,
             format=format,
             pixel_format=pixel_format,
+            crf=crf,
+            preset=preset_value,
         )
 
     def to_file_like(
@@ -84,6 +108,8 @@ def to_file_like(
         format: str,
         *,
         pixel_format: Optional[str] = None,
+        crf: Optional[Union[int, float]] = None,
+        preset: Optional[Union[str, int]] = None,
     ) -> None:
         """Encode frames into a file-like object.
 
@@ -94,14 +120,24 @@ def to_file_like(
                 ``write(data: bytes) -> int`` and ``seek(offset: int, whence:
                 int = 0) -> int``.
             format (str): The container format of the encoded frames, e.g. "mp4", "mov",
-                "mkv", "avi", "webm", "flv", or "gif".
+                "mkv", "avi", "webm", "flv", etc.
             pixel_format (str, optional): The pixel format for encoding (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
+                mean better quality. Valid range depends on the encoder (commonly 0-51).
+                Defaults to None (which will use encoder's default).
+            preset (str or int, optional): Encoder option that controls the tradeoff between
+                encoding speed and compression. Valid values depend on the encoder (commonly
+                a string: "fast", "medium", "slow"). Defaults to None
+                (which will use encoder's default).
         """
+        preset = str(preset) if isinstance(preset, int) else preset
         _core.encode_video_to_file_like(
             frames=self._frames,
             frame_rate=self._frame_rate,
             format=format,
             file_like=file_like,
             pixel_format=pixel_format,
+            crf=crf,
+            preset=preset,
         )