Skip to content

Commit 5becf10

Browse files
authored
Support CPU fallback for videos that don't get decoded by nvdec (#792)
1 parent 0794021 commit 5becf10

File tree

2 files changed

+61
-25
lines changed

2 files changed

+61
-25
lines changed

src/torchcodec/_core/CudaDeviceInterface.cpp

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -196,23 +196,29 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
196196
UniqueAVFrame& avFrame,
197197
FrameOutput& frameOutput,
198198
std::optional<torch::Tensor> preAllocatedOutputTensor) {
199-
// We check that avFrame->format == AV_PIX_FMT_CUDA. This only ensures the
200-
// AVFrame is on GPU memory. It can be on CPU memory if the video isn't
201-
// supported by NVDEC for whatever reason: NVDEC falls back to CPU decoding in
202-
// this case, and our check fails.
203-
// TODO: we could send the frame back into the CPU path, and rely on
204-
// swscale/filtergraph to run the color conversion to properly output the
205-
// frame.
206-
TORCH_CHECK(
207-
avFrame->format == AV_PIX_FMT_CUDA,
208-
"Expected format to be AV_PIX_FMT_CUDA, got ",
209-
(av_get_pix_fmt_name((AVPixelFormat)avFrame->format)
210-
? av_get_pix_fmt_name((AVPixelFormat)avFrame->format)
211-
: "unknown"),
212-
". When that happens, it is probably because the video is not supported by NVDEC. "
213-
"Try using the CPU device instead. "
214-
"If the video is 10bit, we are tracking 10bit support in "
215-
"https://github.com/pytorch/torchcodec/issues/776");
199+
if (avFrame->format != AV_PIX_FMT_CUDA) {
200+
// The frame's format is AV_PIX_FMT_CUDA if and only if its content is on
201+
// the GPU. In this branch, the frame is on the CPU: this is what NVDEC
202+
// gives us if it wasn't able to decode a frame, for whatever reason.
203+
// Typically that happens if the video's encoder isn't supported by NVDEC.
204+
// Below, we choose to convert the frame's color-space using the CPU
205+
// codepath, and send it back to the GPU at the very end.
206+
// TODO: A possibly better solution would be to send the frame to the GPU
207+
// first, and do the color conversion there.
208+
auto cpuDevice = torch::Device(torch::kCPU);
209+
auto cpuInterface = createDeviceInterface(cpuDevice);
210+
211+
FrameOutput cpuFrameOutput;
212+
cpuInterface->convertAVFrameToFrameOutput(
213+
videoStreamOptions,
214+
timeBase,
215+
avFrame,
216+
cpuFrameOutput,
217+
preAllocatedOutputTensor);
218+
219+
frameOutput.data = cpuFrameOutput.data.to(device_);
220+
return;
221+
}
216222

217223
// Above we checked that the AVFrame was on GPU, but that's not enough, we
218224
// also need to check that the AVFrame is in AV_PIX_FMT_NV12 format (8 bits),

test/test_decoders.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,22 +1197,52 @@ def test_pts_to_dts_fallback(self, seek_mode):
11971197
torch.testing.assert_close(decoder[0], decoder[10])
11981198

11991199
@needs_cuda
1200-
@pytest.mark.parametrize("asset", (H264_10BITS, H265_10BITS))
1201-
def test_10bit_videos_cuda(self, asset):
1200+
def test_10bit_videos_cuda(self):
12021201
# Assert that we raise proper error on different kinds of 10bit videos.
12031202

12041203
# TODO we should investigate how to support 10bit videos on GPU.
12051204
# See https://github.com/pytorch/torchcodec/issues/776
12061205

1207-
decoder = VideoDecoder(asset.path, device="cuda")
1206+
asset = H265_10BITS
12081207

1209-
if asset is H265_10BITS:
1210-
match = "The AVFrame is p010le, but we expected AV_PIX_FMT_NV12."
1211-
else:
1212-
match = "Expected format to be AV_PIX_FMT_CUDA, got yuv420p10le."
1213-
with pytest.raises(RuntimeError, match=match):
1208+
decoder = VideoDecoder(asset.path, device="cuda")
1209+
with pytest.raises(
1210+
RuntimeError,
1211+
match="The AVFrame is p010le, but we expected AV_PIX_FMT_NV12.",
1212+
):
12141213
decoder.get_frame_at(0)
12151214

1215+
@needs_cuda
1216+
def test_10bit_gpu_fallsback_to_cpu(self):
1217+
# Test for 10-bit videos that aren't supported by NVDEC: we decode and
1218+
# do the color conversion on the CPU.
1219+
# Here we just assert that the GPU results are the same as the CPU
1220+
# results.
1221+
# TODO see other TODO below in test_10bit_videos_cpu: we should validate
1222+
# the frames against a reference.
1223+
1224+
# We know from previous tests that the H264_10BITS video isn't supported
1225+
# by NVDEC, so NVDEC decodes it on the CPU.
1226+
asset = H264_10BITS
1227+
1228+
decoder_gpu = VideoDecoder(asset.path, device="cuda")
1229+
decoder_cpu = VideoDecoder(asset.path)
1230+
1231+
frame_indices = [0, 10, 20, 5]
1232+
for frame_index in frame_indices:
1233+
frame_gpu = decoder_gpu.get_frame_at(frame_index).data
1234+
assert frame_gpu.device.type == "cuda"
1235+
frame_cpu = decoder_cpu.get_frame_at(frame_index).data
1236+
assert_frames_equal(frame_gpu.cpu(), frame_cpu)
1237+
1238+
# We also check a batch API just to be on the safe side, making sure the
1239+
# pre-allocated tensor is passed down correctly to the CPU
1240+
# implementation.
1241+
frames_gpu = decoder_gpu.get_frames_at(frame_indices).data
1242+
assert frames_gpu.device.type == "cuda"
1243+
frames_cpu = decoder_cpu.get_frames_at(frame_indices).data
1244+
assert_frames_equal(frames_gpu.cpu(), frames_cpu)
1245+
12161246
@pytest.mark.parametrize("asset", (H264_10BITS, H265_10BITS))
12171247
def test_10bit_videos_cpu(self, asset):
12181248
# This just validates that we can decode 10-bit videos on CPU.

0 commit comments

Comments
 (0)