Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 35 additions & 19 deletions src/torchcodec/_core/BetaCudaDeviceInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,11 +150,24 @@ cudaVideoCodec validateCodecSupport(AVCodecID codecId) {
return cudaVideoCodec_HEVC;
case AV_CODEC_ID_AV1:
return cudaVideoCodec_AV1;
// TODONVDEC P0: support more codecs
// case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4;
// case AV_CODEC_ID_VP8: return cudaVideoCodec_VP8;
// case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
// case AV_CODEC_ID_MJPEG: return cudaVideoCodec_JPEG;
case AV_CODEC_ID_VP9:
return cudaVideoCodec_VP9;
case AV_CODEC_ID_VP8:
return cudaVideoCodec_VP8;
case AV_CODEC_ID_MPEG4:
return cudaVideoCodec_MPEG4;
// Formats below are currently not tested, but they should "mostly" work.
// MPEG1 was briefly locally tested and it was ok-ish despite duration being
// off. Since they're far less popular, we keep them disabled by default but
// we can consider enabling them upon user requests.
// case AV_CODEC_ID_MPEG1VIDEO:
// return cudaVideoCodec_MPEG1;
// case AV_CODEC_ID_MPEG2VIDEO:
// return cudaVideoCodec_MPEG2;
// case AV_CODEC_ID_MJPEG:
// return cudaVideoCodec_JPEG;
// case AV_CODEC_ID_VC1:
// return cudaVideoCodec_VC1;
default: {
TORCH_CHECK(false, "Unsupported codec type: ", avcodec_get_name(codecId));
}
Expand Down Expand Up @@ -270,10 +283,17 @@ void BetaCudaDeviceInterface::initializeBSF(
}
break;
}
case AV_CODEC_ID_MPEG4: {
const std::string formatName =
avFormatCtx->iformat->name ? avFormatCtx->iformat->name : "";
if (formatName == "avi") {
filterName = "mpeg4_unpack_bframes";
}
break;
}

default:
// No bitstream filter needed for other codecs
// TODONVDEC P1 MPEG4 will need one!
break;
}

Expand Down Expand Up @@ -512,19 +532,15 @@ UniqueAVFrame BetaCudaDeviceInterface::convertCudaFrameToAVFrame(
avFrame->format = AV_PIX_FMT_CUDA;
avFrame->pts = dispInfo.timestamp;

// TODONVDEC P2: We compute the duration based on average frame rate info:
// either from NVCUVID if it's valid, otherwise from FFmpeg as fallback. But
// both of these are based on average frame rate, so if the video has
// variable frame rate, the durations may be off. We should try to see if we
// can set the duration more accurately. Unfortunately it's not given by
// dispInfo. One option would be to set it based on the pts difference between
// consecutive frames, if the next frame is already available.
int frameRateNum = static_cast<int>(videoFormat_.frame_rate.numerator);
int frameRateDen = static_cast<int>(videoFormat_.frame_rate.denominator);
AVRational frameRate = (frameRateNum > 0 && frameRateDen > 0)
? AVRational{frameRateNum, frameRateDen}
: frameRateAvgFromFFmpeg_;
setDuration(avFrame, computeSafeDuration(frameRate, timeBase_));
// TODONVDEC P2: We compute the duration based on average frame rate info, so
// so if the video has variable frame rate, the durations may be off. We
// should try to see if we can set the duration more accurately. Unfortunately
// it's not given by dispInfo. One option would be to set it based on the pts
// difference between consecutive frames, if the next frame is already
// available.
// Note that we used to rely on videoFormat_.frame_rate for this, but that
// proved less accurate than FFmpeg.
setDuration(avFrame, computeSafeDuration(frameRateAvgFromFFmpeg_, timeBase_));

// We need to assign the frame colorspace. This is crucial for proper color
// conversion. NVCUVID stores that in the matrix_coefficients field, but
Expand Down
Binary file added test/resources/testsrc2_mpeg4.avi
Binary file not shown.
Binary file added test/resources/testsrc2_vp8.webm
Binary file not shown.
Binary file added test/resources/testsrc2_vp9.webm
Binary file not shown.
75 changes: 52 additions & 23 deletions test/test_decoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,12 @@
SINE_MONO_S32,
SINE_MONO_S32_44100,
SINE_MONO_S32_8000,
supports_approximate_mode,
TEST_SRC_2_720P,
TEST_SRC_2_720P_H265,
TEST_SRC_2_720P_MPEG4,
TEST_SRC_2_720P_VP8,
TEST_SRC_2_720P_VP9,
unsplit_device_str,
)

Expand Down Expand Up @@ -588,7 +592,7 @@ def test_get_frame_at_av1(self, device):
return

if device == "cuda" and in_fbcode():
pytest.skip("AV1 decoding on CUDA is not supported internally")
pytest.skip("decoding on CUDA is not supported internally")

decoder = VideoDecoder(AV1_VIDEO.path, device=device)
device, _ = unsplit_device_str(device)
Expand Down Expand Up @@ -1432,15 +1436,20 @@ def test_get_frames_at_tensor_indices(self):
decoder.get_frames_played_at(torch.tensor([0, 1], dtype=torch.int))
decoder.get_frames_played_at(torch.tensor([0, 1], dtype=torch.float))

# TODONVDEC P1 unskip equality assertion checks on FFMpeg4. The comparison
# checks are failing on very few pixels, e.g.:
# TODONVDEC P1:
# - unskip equality assertion checks on FFMpeg4. The comparison
# checks are failing on very few pixels, e.g.:
#
# E Mismatched elements: 648586 / 82944000 (0.8%)
# E Greatest absolute difference: 164 at index (20, 2, 27, 96)
# E Greatest relative difference: inf at index (5, 1, 112, 186)
# E Mismatched elements: 648586 / 82944000 (0.8%)
# E Greatest absolute difference: 164 at index (20, 2, 27, 96)
# E Greatest relative difference: inf at index (5, 1, 112, 186)
#
# So we're skipping them to unblock for now, but we should call
# assert_tensor_close_on_at_least or something like that.
# So we're skipping them to unblock for now, but we should call
# assert_tensor_close_on_at_least or something like that.
# - unskip equality assertion checks for MPEG4 asset. The frames are decoded
# fine, it's the color conversion that's different. The frame from the
# BETA interface is assumed to be 701 while the one from the default
# interface is 601.

@needs_cuda
@pytest.mark.parametrize(
Expand All @@ -1451,15 +1460,18 @@ def test_get_frames_at_tensor_indices(self):
BT709_FULL_RANGE,
TEST_SRC_2_720P_H265,
AV1_VIDEO,
TEST_SRC_2_720P_VP9,
TEST_SRC_2_720P_VP8,
TEST_SRC_2_720P_MPEG4,
),
)
@pytest.mark.parametrize("contiguous_indices", (True, False))
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_get_frame_at(
self, asset, contiguous_indices, seek_mode
):
if asset == AV1_VIDEO and seek_mode == "approximate":
pytest.skip("AV1 asset doesn't work with approximate mode")
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")

ref_decoder = VideoDecoder(asset.path, device="cuda", seek_mode=seek_mode)
beta_decoder = VideoDecoder(
Expand All @@ -1476,7 +1488,8 @@ def test_beta_cuda_interface_get_frame_at(
for frame_index in indices:
ref_frame = ref_decoder.get_frame_at(frame_index)
beta_frame = beta_decoder.get_frame_at(frame_index)
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
# TODONVDEC P1 see above
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
torch.testing.assert_close(
beta_frame.data, ref_frame.data, rtol=0, atol=0
)
Expand All @@ -1493,15 +1506,18 @@ def test_beta_cuda_interface_get_frame_at(
BT709_FULL_RANGE,
TEST_SRC_2_720P_H265,
AV1_VIDEO,
TEST_SRC_2_720P_VP9,
TEST_SRC_2_720P_VP8,
TEST_SRC_2_720P_MPEG4,
),
)
@pytest.mark.parametrize("contiguous_indices", (True, False))
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_get_frames_at(
self, asset, contiguous_indices, seek_mode
):
if asset == AV1_VIDEO and seek_mode == "approximate":
pytest.skip("AV1 asset doesn't work with approximate mode")
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")

ref_decoder = VideoDecoder(asset.path, device="cuda", seek_mode=seek_mode)
beta_decoder = VideoDecoder(
Expand All @@ -1518,7 +1534,8 @@ def test_beta_cuda_interface_get_frames_at(

ref_frames = ref_decoder.get_frames_at(indices)
beta_frames = beta_decoder.get_frames_at(indices)
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
# TODONVDEC P1 see above
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
torch.testing.assert_close(
beta_frames.data, ref_frames.data, rtol=0, atol=0
)
Expand All @@ -1536,12 +1553,15 @@ def test_beta_cuda_interface_get_frames_at(
BT709_FULL_RANGE,
TEST_SRC_2_720P_H265,
AV1_VIDEO,
TEST_SRC_2_720P_VP9,
TEST_SRC_2_720P_VP8,
TEST_SRC_2_720P_MPEG4,
),
)
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
if asset == AV1_VIDEO and seek_mode == "approximate":
pytest.skip("AV1 asset doesn't work with approximate mode")
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")

ref_decoder = VideoDecoder(asset.path, device="cuda", seek_mode=seek_mode)
beta_decoder = VideoDecoder(
Expand All @@ -1556,7 +1576,8 @@ def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
for pts in timestamps:
ref_frame = ref_decoder.get_frame_played_at(pts)
beta_frame = beta_decoder.get_frame_played_at(pts)
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
# TODONVDEC P1 see above
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
torch.testing.assert_close(
beta_frame.data, ref_frame.data, rtol=0, atol=0
)
Expand All @@ -1573,12 +1594,15 @@ def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
BT709_FULL_RANGE,
TEST_SRC_2_720P_H265,
AV1_VIDEO,
TEST_SRC_2_720P_VP9,
TEST_SRC_2_720P_VP8,
TEST_SRC_2_720P_MPEG4,
),
)
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):
if asset == AV1_VIDEO and seek_mode == "approximate":
pytest.skip("AV1 asset doesn't work with approximate mode")
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")

ref_decoder = VideoDecoder(asset.path, device="cuda", seek_mode=seek_mode)
beta_decoder = VideoDecoder(
Expand All @@ -1593,7 +1617,8 @@ def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):

ref_frames = ref_decoder.get_frames_played_at(timestamps)
beta_frames = beta_decoder.get_frames_played_at(timestamps)
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
# TODONVDEC P1 see above
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
torch.testing.assert_close(
beta_frames.data, ref_frames.data, rtol=0, atol=0
)
Expand All @@ -1611,12 +1636,15 @@ def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):
BT709_FULL_RANGE,
TEST_SRC_2_720P_H265,
AV1_VIDEO,
TEST_SRC_2_720P_VP9,
TEST_SRC_2_720P_VP8,
TEST_SRC_2_720P_MPEG4,
),
)
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
def test_beta_cuda_interface_backwards(self, asset, seek_mode):
if asset == AV1_VIDEO and seek_mode == "approximate":
pytest.skip("AV1 asset doesn't work with approximate mode")
if seek_mode == "approximate" and not supports_approximate_mode(asset):
pytest.skip("asset doesn't work with approximate mode")

ref_decoder = VideoDecoder(asset.path, device="cuda", seek_mode=seek_mode)
beta_decoder = VideoDecoder(
Expand All @@ -1635,7 +1663,8 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode):

ref_frame = ref_decoder.get_frame_at(frame_index)
beta_frame = beta_decoder.get_frame_at(frame_index)
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
# TODONVDEC P1 see above
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
torch.testing.assert_close(
beta_frame.data, ref_frame.data, rtol=0, atol=0
)
Expand Down
37 changes: 37 additions & 0 deletions test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,3 +717,40 @@ def sample_format(self) -> str:
},
frames={0: {}}, # Not needed for now
)

# ffmpeg -f lavfi -i testsrc2=size=1280x720:rate=30:duration=1 -c:v libvpx-vp9 -b:v 1M output_vp9.webm
TEST_SRC_2_720P_VP9 = TestVideo(
filename="testsrc2_vp9.webm",
default_stream_index=0,
stream_infos={
0: TestVideoStreamInfo(width=1280, height=720, num_color_channels=3),
},
frames={0: {}}, # Not needed for now
)

# ffmpeg -f lavfi -i testsrc2=size=1280x720:rate=30:duration=1 -c:v libvpx -b:v 1M output_vp8.webm
TEST_SRC_2_720P_VP8 = TestVideo(
filename="testsrc2_vp8.webm",
default_stream_index=0,
stream_infos={
0: TestVideoStreamInfo(width=1280, height=720, num_color_channels=3),
},
frames={0: {}}, # Not needed for now
)

# ffmpeg -f lavfi -i testsrc2=size=1280x720:rate=30:duration=1 -c:v mpeg4 -q:v 5 output_mpeg4.avi
TEST_SRC_2_720P_MPEG4 = TestVideo(
filename="testsrc2_mpeg4.avi",
default_stream_index=0,
stream_infos={
0: TestVideoStreamInfo(width=1280, height=720, num_color_channels=3),
},
frames={0: {}}, # Not needed for now
)


def supports_approximate_mode(asset: TestVideo) -> bool:
# TODONVDEC P2: open an issue about his. That's actually not related to
# NVDEC at all, those don't support approximate mode because they don't set
# a duration. CPU decoder fails too!
return asset not in (AV1_VIDEO, TEST_SRC_2_720P_VP9, TEST_SRC_2_720P_VP8)
Loading