From 33ae29577e194713b301993a9c910f421cc78245 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 5 Oct 2025 11:47:48 +0100 Subject: [PATCH 1/2] Clarify stuff --- .../_core/BetaCudaDeviceInterface.cpp | 20 +++++++++++++++++++ test/test_decoders.py | 20 ++++++++----------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index 078655462..eb1850dd9 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -108,10 +108,30 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) { " vs supported:", caps.nMaxMBCount); + // Below we'll set the decoderParams.OutputFormat to NV12, so we need to make + // sure it's actually supported. + TORCH_CHECK( + caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12), + "NV12 output format is not supported for this configuration. ", + "Codec: ", + static_cast(videoFormat->codec), + ", chroma format: ", + static_cast(videoFormat->chroma_format), + ", bit depth: ", + videoFormat->bit_depth_luma_minus8 + 8); + // Decoder creation parameters, most are taken from DALI CUVIDDECODECREATEINFO decoderParams = {}; decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8; decoderParams.ChromaFormat = videoFormat->chroma_format; + // We explicitly request NV12 format, which means 10bit videos will be + // automatically converted to 8bits by NVDEC itself. That is, the raw frames + // we get back from cuvidMapVideoFrame will already be in 8bit format. We + // won't need to do the conversion ourselves, so that's a lot easier. + // In the default interface, we have to do the 10 -> 8bits conversion + // ourselves later in convertAVFrameToFrameOutput(), because FFmpeg explicitly + // requests 10 or 16bits output formats for >8-bit videos! + // https://github.com/FFmpeg/FFmpeg/blob/e05f8acabff468c1382277c1f31fa8e9d90c3202/libavcodec/nvdec.c#L376-L403 decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12; decoderParams.ulCreationFlags = cudaVideoCreate_Default; decoderParams.CodecType = videoFormat->codec; diff --git a/test/test_decoders.py b/test/test_decoders.py index e5139d089..ca73b99b8 100644 --- a/test/test_decoders.py +++ b/test/test_decoders.py @@ -1288,18 +1288,15 @@ def test_10bit_videos(self, device, asset): # This just validates that we can decode 10-bit videos. # TODO validate against the ref that the decoded frames are correct - if device == "cuda:0:beta": - # This fails on our BETA interface on asset 0 (only!) with: + if device == "cuda:0:beta" and asset is H264_10BITS: + # This fails on the BETA interface with: # # RuntimeError: Codec configuration not supported on this GPU. # Codec: 4, chroma format: 1, bit depth: 10 # - # I don't remember but I suspect asset 0 is actually the one that - # fallsback to the CPU path on the default CUDA interface (that - # would make sense) - # We should investigate if and how we could make that fallback - # happen for the BETA interface. - pytest.skip("TODONVDEC P2 - investigate and unskip") + # It works on the default interface because FFmpeg fallsback to the + # CPU, while the BETA interface doesn't. + pytest.skip("Asset not supported by NVDEC") decoder = VideoDecoder(asset.path, device=device) decoder.get_frame_at(10) @@ -1674,12 +1671,11 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode): @needs_cuda def test_beta_cuda_interface_small_h265(self): - # TODONVDEC P2 investigate why/how the default interface can decode this - # video. + # Test to illustrate current difference in behavior between the BETA and + # the default interface: this video isn't supported by NVDEC, but in the + # default interface, FFMPEG fallsback to the CPU while we don't. - # This is fine on the default interface - why? VideoDecoder(H265_VIDEO.path, device="cuda").get_frame_at(0) - # But it fails on the beta interface due to input validation checks, which we took from DALI! with pytest.raises( RuntimeError, match="Video is too small in at least one dimension. Provided: 128x128 vs supported:144x144", From 001bbc8062d09f40e6c09362e39500bcaa7fdf86 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Sun, 5 Oct 2025 12:04:19 +0100 Subject: [PATCH 2/2] Clearer bitwise logic --- src/torchcodec/_core/BetaCudaDeviceInterface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index eb1850dd9..333b92bde 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -111,7 +111,7 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) { // Below we'll set the decoderParams.OutputFormat to NV12, so we need to make // sure it's actually supported. TORCH_CHECK( - caps.nOutputFormatMask & (1 << cudaVideoSurfaceFormat_NV12), + (caps.nOutputFormatMask >> cudaVideoSurfaceFormat_NV12) & 1, "NV12 output format is not supported for this configuration. ", "Codec: ", static_cast(videoFormat->codec),