comments, names, assert pix_fmt

Dan-Flores · Dan-Flores · commit 79e5c874d311 · 2025-12-05T18:35:06.000Z
diff --git a/src/torchcodec/_core/CudaDeviceInterface.cpp b/src/torchcodec/_core/CudaDeviceInterface.cpp
@@ -378,7 +378,7 @@ const Npp32f defaultLimitedRangeRgbToNv12[3][4] = {
     {0.439f, -0.368f, -0.071f, 128.0f}};
 } // namespace
 
-std::optional<UniqueAVFrame> CudaDeviceInterface::convertTensorToAVFrame(
+UniqueAVFrame CudaDeviceInterface::convertCUDATensorToAVFrameForEncoding(
     const torch::Tensor& tensor,
     int frameIndex,
     AVCodecContext* codecContext) {
@@ -440,7 +440,7 @@ std::optional<UniqueAVFrame> CudaDeviceInterface::convertTensorToAVFrame(
 // Allocates and initializes AVHWFramesContext, and sets pixel format fields
 // to enable encoding with CUDA device. The hw_frames_ctx field is needed by
 // FFmpeg to allocate frames on GPU's memory.
-void CudaDeviceInterface::setupHardwareFrameContext(
+void CudaDeviceInterface::setupHardwareFrameContextForEncoding(
     AVCodecContext* codecContext) {
   TORCH_CHECK(codecContext != nullptr, "codecContext is null");
   TORCH_CHECK(
diff --git a/src/torchcodec/_core/CudaDeviceInterface.h b/src/torchcodec/_core/CudaDeviceInterface.h
@@ -41,12 +41,13 @@ class CudaDeviceInterface : public DeviceInterface {
 
   std::string getDetails() override;
 
-  std::optional<UniqueAVFrame> convertTensorToAVFrame(
+  UniqueAVFrame convertCUDATensorToAVFrameForEncoding(
       const torch::Tensor& tensor,
       int frameIndex,
       AVCodecContext* codecContext) override;
 
-  void setupHardwareFrameContext(AVCodecContext* codecContext) override;
+  void setupHardwareFrameContextForEncoding(
+      AVCodecContext* codecContext) override;
 
  private:
   // Our CUDA decoding code assumes NV12 format. In order to handle other
diff --git a/src/torchcodec/_core/DeviceInterface.h b/src/torchcodec/_core/DeviceInterface.h
@@ -139,16 +139,22 @@ class DeviceInterface {
   }
 
   // Function used for video encoding, only implemented in CudaDeviceInterface.
-  virtual std::optional<UniqueAVFrame> convertTensorToAVFrame(
+  // It is here to isolate CUDA dependencies from CPU builds
+  // TODO Video-Encoder: Reconsider using video encoding functions in device
+  // interface
+  virtual UniqueAVFrame convertCUDATensorToAVFrameForEncoding(
       [[maybe_unused]] const torch::Tensor& tensor,
       [[maybe_unused]] int frameIndex,
       [[maybe_unused]] AVCodecContext* codecContext) {
-    return std::nullopt;
+    TORCH_CHECK(false);
   }
 
   // Function used for video encoding, only implemented in CudaDeviceInterface.
-  virtual void setupHardwareFrameContext(
-      [[maybe_unused]] AVCodecContext* codecContext) {}
+  // It is here to isolate CUDA dependencies from CPU builds
+  virtual void setupHardwareFrameContextForEncoding(
+      [[maybe_unused]] AVCodecContext* codecContext) {
+    TORCH_CHECK(false);
+  }
 
  protected:
   torch::Device device_;
diff --git a/src/torchcodec/_core/Encoder.cpp b/src/torchcodec/_core/Encoder.cpp
@@ -830,7 +830,8 @@ void VideoEncoder::initializeEncoder(
   // When frames are on a CUDA device, deviceInterface_ will be defined.
   if (frames_.device().is_cuda() && deviceInterface_) {
     deviceInterface_->registerHardwareDeviceWithCodec(avCodecContext_.get());
-    deviceInterface_->setupHardwareFrameContext(avCodecContext_.get());
+    deviceInterface_->setupHardwareFrameContextForEncoding(
+        avCodecContext_.get());
   }
 
   int status = avcodec_open2(avCodecContext_.get(), avCodec, &avCodecOptions);
@@ -875,15 +876,15 @@ void VideoEncoder::encode() {
     torch::Tensor currFrame = frames_[i];
     UniqueAVFrame avFrame;
     if (frames_.device().is_cuda() && deviceInterface_) {
-      auto cudaFrame = deviceInterface_->convertTensorToAVFrame(
+      auto cudaFrame = deviceInterface_->convertCUDATensorToAVFrameForEncoding(
           currFrame, i, avCodecContext_.get());
       TORCH_CHECK(
-          cudaFrame.has_value(),
-          "convertTensorToAVFrame failed for frame ",
+          cudaFrame != nullptr,
+          "convertCUDATensorToAVFrameForEncoding failed for frame ",
           i,
-          "on device: ",
+          " on device: ",
           frames_.device());
-      avFrame = std::move(*cudaFrame);
+      avFrame = std::move(cudaFrame);
     } else {
       avFrame = convertTensorToAVFrame(currFrame, i);
     }
diff --git a/src/torchcodec/_core/StreamOptions.h b/src/torchcodec/_core/StreamOptions.h
@@ -41,8 +41,8 @@ struct VideoStreamOptions {
       ColorConversionLibrary::FILTERGRAPH;
 
   // By default we use CPU for decoding for both C++ and python users.
-  // Note: For video encoding, device is determined by the location of the input
-  // frame tensor.
+  // Note: This is not used for video encoding, because device is determined by
+  // the device of the input frame tensor.
   torch::Device device = torch::kCPU;
   // Device variant (e.g., "ffmpeg", "beta", etc.)
   std::string_view deviceVariant = "ffmpeg";
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -17,7 +17,6 @@
     assert_tensor_close_on_at_least,
     get_ffmpeg_major_version,
     get_ffmpeg_minor_version,
-    in_fbcode,
     IS_WINDOWS,
     NASA_AUDIO_MP3,
     needs_ffmpeg_cli,
@@ -1304,14 +1303,16 @@ def test_extra_options_utilized(self, tmp_path, profile, colorspace, color_range
         assert metadata["color_space"] == colorspace
         assert metadata["color_range"] == color_range
 
+    @needs_ffmpeg_cli
     @pytest.mark.needs_cuda
-    @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available")
+    # TODO-VideoEncoder: Auto-select codec for GPU encoding
     @pytest.mark.parametrize(
         "format_codec",
         [
             ("mov", "h264_nvenc"),
             ("mp4", "hevc_nvenc"),
             ("avi", "h264_nvenc"),
+            # TODO-VideoEncoder: add in_CI mark, similar to in_fbcode
             # ("mkv", "av1_nvenc"), # av1_nvenc is not supported on CI
         ],
     )
@@ -1354,16 +1355,7 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method):
             ffmpeg_cmd.extend(["-rc", "constqp"])  # Set rate control mode for AV1
         ffmpeg_cmd.extend(["-qp", str(qp)])  # Use lossless qp for other codecs
         ffmpeg_cmd.extend([ffmpeg_encoded_path])
-
-        # TODO-VideoEncoder: Ensure CI does not skip this test, as we know NVENC is available.
-        try:
-            subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
-        except subprocess.CalledProcessError as e:
-            if b"No NVENC capable devices found" in e.stderr:
-                pytest.skip("NVENC not available on this system")
-            else:
-                raise
-
+        subprocess.run(ffmpeg_cmd, check=True, capture_output=True)
         encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
 
         encoder_extra_options = {"qp": qp}
@@ -1404,4 +1396,11 @@ def test_nvenc_against_ffmpeg_cli(self, tmp_path, format_codec, method):
         assert ffmpeg_frames.shape[0] == encoder_frames.shape[0]
         for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames):
             assert psnr(ff_frame, enc_frame) > 25
-            assert_tensor_close_on_at_least(ff_frame, enc_frame, percentage=95, atol=2)
+            assert_tensor_close_on_at_least(ff_frame, enc_frame, percentage=96, atol=2)
+
+        if method == "to_file":
+            ffmpeg_metadata = self._get_video_metadata(ffmpeg_encoded_path, ["pix_fmt"])
+            encoder_metadata = self._get_video_metadata(encoder_output, ["pix_fmt"])
+            # pix_fmt nv12 is stored as yuv420p in metadata
+            assert encoder_metadata["pix_fmt"] == "yuv420p"
+            assert ffmpeg_metadata["pix_fmt"] == "yuv420p"