Merge branch 'crf_encode_option' of https://github.com/Dan-Flores/torchcodec into preset_encode_option

Dan-Flores · Dan-Flores · commit 682e7cb1f033 · 2025-11-12T13:15:17.000-05:00
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -5,6 +5,7 @@
 #include "torch/types.h"
 
 extern "C" {
+#include <libavutil/opt.h>
 #include <libavutil/pixdesc.h>
 }
 
@@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat(
   }
   TORCH_CHECK(false, errorMsg.str());
 }
+
+void validateDoubleOption(
+    const AVCodec& avCodec,
+    const char* optionName,
+    double value) {
+  if (!avCodec.priv_class) {
+    return;
+  }
+  const AVOption* option = av_opt_find2(
+      // Convert obj arg from const AVClass* const* to non-const void*
+      // First cast to remove const, then cast to void*
+      const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
+      optionName,
+      nullptr,
+      0,
+      AV_OPT_SEARCH_FAKE_OBJ,
+      nullptr);
+  // If the option was not found, let FFmpeg handle it later
+  if (!option) {
+    return;
+  }
+  if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
+      option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
+    TORCH_CHECK(
+        value >= option->min && value <= option->max,
+        optionName,
+        "=",
+        value,
+        " is out of valid range [",
+        option->min,
+        ", ",
+        option->max,
+        "] for this codec. For more details, run 'ffmpeg -h encoder=",
+        avCodec.name,
+        "'");
+  }
+}
 } // namespace
 
 VideoEncoder::~VideoEncoder() {
@@ -700,6 +738,7 @@ void VideoEncoder::initializeEncoder(
   // Apply videoStreamOptions
   AVDictionary* options = nullptr;
   if (videoStreamOptions.crf.has_value()) {
+    validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
     av_dict_set(
         &options,
         "crf",
diff --git a/src/torchcodec/_core/custom_ops.cpp b/src/torchcodec/_core/custom_ops.cpp
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
   m.def(
       "_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
   m.def(
-      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None, str? preset=None) -> ()");
+      "encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
   m.def(
-      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None, str? preset=None) -> Tensor");
+      "encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
   m.def(
-      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None, str? preset=None) -> ()");
+      "_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
   m.def(
       "create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
   m.def(
@@ -604,7 +604,7 @@ void encode_video_to_file(
     int64_t frame_rate,
     std::string_view file_name,
     std::optional<std::string_view> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt,
+    std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt) {
   VideoStreamOptions videoStreamOptions;
   videoStreamOptions.pixelFormat = pixel_format;
@@ -623,7 +623,7 @@ at::Tensor encode_video_to_tensor(
     int64_t frame_rate,
     std::string_view format,
     std::optional<std::string_view> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt,
+    std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt) {
   auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
   VideoStreamOptions videoStreamOptions;
@@ -645,7 +645,7 @@ void _encode_video_to_file_like(
     std::string_view format,
     int64_t file_like_context,
     std::optional<std::string_view> pixel_format = std::nullopt,
-    std::optional<int64_t> crf = std::nullopt,
+    std::optional<double> crf = std::nullopt,
     std::optional<std::string_view> preset = std::nullopt) {
   auto fileLikeContext =
       reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
diff --git a/src/torchcodec/_core/ops.py b/src/torchcodec/_core/ops.py
@@ -213,7 +213,7 @@ def encode_video_to_file_like(
     frame_rate: int,
     format: str,
     file_like: Union[io.RawIOBase, io.BufferedIOBase],
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     pixel_format: Optional[str] = None,
     preset: Optional[str] = None,
 ) -> None:
@@ -326,7 +326,7 @@ def encode_video_to_file_abstract(
     frame_rate: int,
     filename: str,
     pixel_format: Optional[str] = None,
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
 ) -> None:
     return
@@ -338,7 +338,7 @@ def encode_video_to_tensor_abstract(
     frame_rate: int,
     format: str,
     pixel_format: Optional[str] = None,
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
 ) -> torch.Tensor:
     return torch.empty([], dtype=torch.long)
@@ -351,7 +351,7 @@ def _encode_video_to_file_like_abstract(
     format: str,
     file_like_context: int,
     pixel_format: Optional[str] = None,
-    crf: Optional[int] = None,
+    crf: Optional[Union[int, float]] = None,
     preset: Optional[str] = None,
 ) -> None:
     return
diff --git a/src/torchcodec/encoders/_video_encoder.py b/src/torchcodec/encoders/_video_encoder.py
@@ -37,7 +37,7 @@ def to_file(
         dest: Union[str, Path],
         *,
         pixel_format: Optional[str] = None,
-        crf: Optional[int] = None,
+        crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
     ) -> None:
         """Encode frames into a file.
@@ -48,7 +48,7 @@ def to_file(
                 container format.
             pixel_format (str, optional): The pixel format for encoding (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
-            crf (int, optional): Constant Rate Factor for encoding quality. Lower values
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
                 mean better quality. Valid range depends on the encoder (commonly 0-51).
                 Defaults to None (which will use encoder's default).
             preset (str or int, optional): Encoder option that controls the tradeoff between
@@ -71,7 +71,7 @@ def to_tensor(
         format: str,
         *,
         pixel_format: Optional[str] = None,
-        crf: Optional[int] = None,
+        crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
     ) -> Tensor:
         """Encode frames into raw bytes, as a 1D uint8 Tensor.
@@ -81,7 +81,7 @@ def to_tensor(
                 "mkv", "avi", "webm", "flv", etc.
             pixel_format (str, optional): The pixel format to encode frames into (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
-            crf (int, optional): Constant Rate Factor for encoding quality. Lower values
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
                 mean better quality. Valid range depends on the encoder (commonly 0-51).
                 Defaults to None (which will use encoder's default).
             preset (str or int, optional): Encoder option that controls the tradeoff between
@@ -109,7 +109,7 @@ def to_file_like(
         format: str,
         *,
         pixel_format: Optional[str] = None,
-        crf: Optional[int] = None,
+        crf: Optional[Union[int, float]] = None,
         preset: Optional[Union[str, int]] = None,
     ) -> None:
         """Encode frames into a file-like object.
@@ -124,7 +124,7 @@ def to_file_like(
                 "mkv", "avi", "webm", "flv", etc.
             pixel_format (str, optional): The pixel format for encoding (e.g.,
                 "yuv420p", "yuv444p"). If not specified, uses codec's default format.
-            crf (int, optional): Constant Rate Factor for encoding quality. Lower values
+            crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
                 mean better quality. Valid range depends on the encoder (commonly 0-51).
                 Defaults to None (which will use encoder's default).
             preset (str or int, optional): Encoder option that controls the tradeoff between
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -610,6 +610,37 @@ def test_bad_input_parameterized(self, tmp_path, method):
             )
             getattr(encoder, method)(**valid_params)
 
+        with pytest.raises(RuntimeError, match=r"crf=-10 is out of valid range"):
+            encoder = VideoEncoder(
+                frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
+                frame_rate=30,
+            )
+            getattr(encoder, method)(**valid_params, crf=-10)
+
+        with pytest.raises(
+            RuntimeError,
+            match=r"avcodec_open2 failed: Invalid argument",
+        ):
+            encoder.to_tensor(format="mp4", preset="fake_preset")
+
+    @pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
+    @pytest.mark.parametrize("crf", [23, 23.5, -0.9])
+    def test_crf_valid_values(self, method, crf, tmp_path):
+        if method == "to_file":
+            valid_params = {"dest": str(tmp_path / "test.mp4")}
+        elif method == "to_tensor":
+            valid_params = {"format": "mp4"}
+        elif method == "to_file_like":
+            valid_params = dict(file_like=io.BytesIO(), format="mp4")
+        else:
+            raise ValueError(f"Unknown method: {method}")
+
+        encoder = VideoEncoder(
+            frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
+            frame_rate=30,
+        )
+        getattr(encoder, method)(**valid_params, crf=crf)
+
     def test_bad_input(self):
         encoder = VideoEncoder(
             frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
@@ -634,12 +665,6 @@ def test_bad_input(self):
         ):
             encoder.to_tensor(format="bad_format")
 
-        with pytest.raises(
-            RuntimeError,
-            match=r"avcodec_open2 failed: Invalid argument",
-        ):
-            encoder.to_tensor(format="mp4", preset="fake_preset")
-
     @pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
     def test_pixel_format_errors(self, method, tmp_path):
         frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)