WIP

NicolasHug · NicolasHug · commit 82db43507488 · 2025-10-17T11:41:18.000+01:00
diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp
@@ -98,6 +98,8 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
 
 std::optional<cudaVideoChromaFormat> mapChromaFormat(
     const AVPixFmtDescriptor* desc) {
+  // Return the corresponding cudaVideoChromaFormat if supported, std::nullopt
+  // otherwise.
   TORCH_CHECK(desc != nullptr, "desc can't be null");
 
   if (desc->nb_components == 1) {
@@ -117,6 +119,10 @@ std::optional<cudaVideoChromaFormat> mapChromaFormat(
 }
 
 std::optional<cudaVideoCodec> validateCodecSupport(AVCodecID codecId) {
+  // Return the corresponding cudaVideoCodec if supported, std::nullopt
+  // otherwise
+  // Note that we currently return nullopt (and thus fallback to CPU) for some
+  // codecs that are technically supported by NVDEC, see comment below.
   switch (codecId) {
     case AV_CODEC_ID_H264:
       return cudaVideoCodec_H264;
@@ -148,6 +154,8 @@ std::optional<cudaVideoCodec> validateCodecSupport(AVCodecID codecId) {
 }
 
 bool nativeNVDECSupport(const SharedAVCodecContext& codecContext) {
+  // Return true iff the input video stream is supported by our NVDEC
+  // implementation.
   auto codecType = validateCodecSupport(codecContext->codec_id);
   if (!codecType.has_value()) {
     return false;
@@ -177,28 +185,25 @@ bool nativeNVDECSupport(const SharedAVCodecContext& codecContext) {
     return false;
   }
 
-  if (!(static_cast<unsigned int>(codecContext->coded_width) >=
-            caps.nMinWidth &&
-        static_cast<unsigned int>(codecContext->coded_height) >=
-            caps.nMinHeight &&
-        static_cast<unsigned int>(codecContext->coded_width) <=
-            caps.nMaxWidth &&
-        static_cast<unsigned int>(codecContext->coded_height) <=
-            caps.nMaxHeight)) {
+  auto coded_width = static_cast<unsigned int>(codecContext->coded_width);
+  auto coded_height = static_cast<unsigned int>(codecContext->coded_height);
+  if (!(coded_width >= static_cast<unsigned int>(caps.nMinWidth) &&
+        coded_height >= static_cast<unsigned int>(caps.nMinHeight) &&
+        coded_width <= caps.nMaxWidth && coded_height <= caps.nMaxHeight)) {
     return false;
   }
 
   // See nMaxMBCount in cuviddec.h
   constexpr unsigned int macroblockConstant = 256;
-  if (!(static_cast<unsigned int>(
-            codecContext->coded_width * codecContext->coded_height) /
-            macroblockConstant <=
-        caps.nMaxMBCount)) {
+  if (!(coded_width * coded_height / macroblockConstant <= caps.nMaxMBCount)) {
     return false;
   }
 
-  // We explicitly request NV12 output format in createDecoder(), so we need to
-  // make sure it's supported.
+  // We'll set the decoderParams.OutputFormat to NV12, so we need to make
+  // sure it's actually supported.
+  // TODO: If this fail, we could consider decoding to something else than NV12
+  // (like cudaVideoSurfaceFormat_P016) instead of falling back to CPU. This is
+  // what FFmpeg does.
   if (!((caps.nOutputFormatMask >> cudaVideoSurfaceFormat_NV12) & 1)) {
     return false;
   }
diff --git a/test/test_decoders.py b/test/test_decoders.py
@@ -1701,19 +1701,19 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode):
             assert beta_frame.duration_seconds == ref_frame.duration_seconds
 
     @needs_cuda
-    def test_beta_cuda_interface_small_h265(self):
-        # Test to illustrate current difference in behavior between the BETA and
-        # the ffmpeg interface: this video isn't supported by NVDEC, but in the
-        # ffmpeg interface, FFMPEG fallsback to the CPU while we don't.
-
-        print()
-        a = VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
-        # with pytest.raises(
-        #     RuntimeError,
-        #     match="Video is too small in at least one dimension. Provided: 128x128 vs supported:144x144",
-        # ):
-        b = VideoDecoder(H265_VIDEO.path, device="cuda:0:beta").get_frame_at(0)
-        torch.testing.assert_close(a.data, b.data, rtol=0, atol=0)
+    def test_beta_cuda_interface_cpu_fallback(self):
+        # Non-regression test for the CPU fallback behavior of the BETA CUDA
+        # interface.
+        # We know that the H265_VIDEO asset isn't supported by NVDEC, its
+        # dimensions are too small. We also know that the FFmpeg CUDA interface
+        # fallbacks to the CPU path in such cases. We assert that we fall back
+        # to the CPU path, too.
+
+        ffmpeg = VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
+        with set_cuda_backend("beta"):
+            beta = VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0)
+
+        torch.testing.assert_close(ffmpeg.data, beta.data, rtol=0, atol=0)
 
     @needs_cuda
     def test_beta_cuda_interface_error(self):
@@ -1739,20 +1739,25 @@ def test_set_cuda_backend(self):
             assert _get_cuda_backend() == "beta"
 
         def assert_decoder_uses(decoder, *, expected_backend):
+            # TODO: This doesn't work anymore after
+            # https://github.com/meta-pytorch/torchcodec/pull/977
+            # We need to define a better way to assert which backend a decoder
+            # is using.
+            return
             # Assert that a decoder instance is using a given backend.
             #
             # We know H265_VIDEO fails on the BETA backend while it works on the
             # ffmpeg one.
-            if expected_backend == "ffmpeg":
-                decoder.get_frame_at(0)  # this would fail if this was BETA
-            else:
-                with pytest.raises(RuntimeError, match="Video is too small"):
-                    decoder.get_frame_at(0)
+            # if expected_backend == "ffmpeg":
+            #     decoder.get_frame_at(0)  # this would fail if this was BETA
+            # else:
+            #     with pytest.raises(RuntimeError, match="Video is too small"):
+            #         decoder.get_frame_at(0)
 
         # Check that the default is the ffmpeg backend
         assert _get_cuda_backend() == "ffmpeg"
         dec = VideoDecoder(H265_VIDEO.path, device="cuda")
-        assert_decoder_uses(dec, expected_backend="ffmpeg")
+        # assert_decoder_uses(dec, expected_backend="ffmpeg")
 
         # Check the setting "beta" effectively uses the BETA backend.
         # We also show that the affects decoder creation only. When the decoder