Skip to content

Commit 490c13b

Browse files
committed
Add MPEG4 support
1 parent 8ce445b commit 490c13b

File tree

5 files changed

+66
-27
lines changed

5 files changed

+66
-27
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,9 @@ cudaVideoCodec validateCodecSupport(AVCodecID codecId) {
154154
return cudaVideoCodec_VP9;
155155
case AV_CODEC_ID_VP8:
156156
return cudaVideoCodec_VP8;
157+
case AV_CODEC_ID_MPEG4:
158+
return cudaVideoCodec_MPEG4;
157159
// TODONVDEC P0: support more codecs
158-
// case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4;
159160
// case AV_CODEC_ID_MJPEG: return cudaVideoCodec_JPEG;
160161
default: {
161162
TORCH_CHECK(false, "Unsupported codec type: ", avcodec_get_name(codecId));
@@ -272,6 +273,14 @@ void BetaCudaDeviceInterface::initializeBSF(
272273
}
273274
break;
274275
}
276+
case AV_CODEC_ID_MPEG4: {
277+
const std::string formatName =
278+
avFormatCtx->iformat->name ? avFormatCtx->iformat->name : "";
279+
if (formatName == "avi") {
280+
filterName = "mpeg4_unpack_bframes";
281+
}
282+
break;
283+
}
275284

276285
default:
277286
// No bitstream filter needed for other codecs
@@ -514,19 +523,15 @@ UniqueAVFrame BetaCudaDeviceInterface::convertCudaFrameToAVFrame(
514523
avFrame->format = AV_PIX_FMT_CUDA;
515524
avFrame->pts = dispInfo.timestamp;
516525

517-
// TODONVDEC P2: We compute the duration based on average frame rate info:
518-
// either from NVCUVID if it's valid, otherwise from FFmpeg as fallback. But
519-
// both of these are based on average frame rate, so if the video has
520-
// variable frame rate, the durations may be off. We should try to see if we
521-
// can set the duration more accurately. Unfortunately it's not given by
522-
// dispInfo. One option would be to set it based on the pts difference between
523-
// consecutive frames, if the next frame is already available.
524-
int frameRateNum = static_cast<int>(videoFormat_.frame_rate.numerator);
525-
int frameRateDen = static_cast<int>(videoFormat_.frame_rate.denominator);
526-
AVRational frameRate = (frameRateNum > 0 && frameRateDen > 0)
527-
? AVRational{frameRateNum, frameRateDen}
528-
: frameRateAvgFromFFmpeg_;
529-
setDuration(avFrame, computeSafeDuration(frameRate, timeBase_));
526+
// TODONVDEC P2: We compute the duration based on average frame rate info, so
527+
// so if the video has variable frame rate, the durations may be off. We
528+
// should try to see if we can set the duration more accurately. Unfortunately
529+
// it's not given by dispInfo. One option would be to set it based on the pts
530+
// difference between consecutive frames, if the next frame is already
531+
// available.
532+
// Note that we used to rely on videoFormat_.frame_rate for this, but that
533+
// proved less accurate than FFmpeg.
534+
setDuration(avFrame, computeSafeDuration(frameRateAvgFromFFmpeg_, timeBase_));
530535

531536
// We need to assign the frame colorspace. This is crucial for proper color
532537
// conversion. NVCUVID stores that in the matrix_coefficients field, but

src/torchcodec/_core/CudaDeviceInterface.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ void CudaDeviceInterface::convertAVFrameToFrameOutput(
472472

473473
// For background, see
474474
// Note [YUV -> RGB Color Conversion, color space and color range]
475+
// if (avFrame->colorspace == AVColorSpace::AVCOL_SPC_BT709) {
475476
if (avFrame->colorspace == AVColorSpace::AVCOL_SPC_BT709) {
476477
if (avFrame->color_range == AVColorRange::AVCOL_RANGE_JPEG) {
477478
// NPP provides a pre-defined color conversion function for BT.709 full

test/resources/testsrc2_mpeg4.avi

777 KB
Binary file not shown.

test/test_decoders.py

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
SINE_MONO_S32_8000,
4646
TEST_SRC_2_720P,
4747
TEST_SRC_2_720P_H265,
48+
TEST_SRC_2_720P_MPEG4,
4849
TEST_SRC_2_720P_VP8,
4950
TEST_SRC_2_720P_VP9,
5051
unsplit_device_str,
@@ -1434,15 +1435,20 @@ def test_get_frames_at_tensor_indices(self):
14341435
decoder.get_frames_played_at(torch.tensor([0, 1], dtype=torch.int))
14351436
decoder.get_frames_played_at(torch.tensor([0, 1], dtype=torch.float))
14361437

1437-
# TODONVDEC P1 unskip equality assertion checks on FFMpeg4. The comparison
1438-
# checks are failing on very few pixels, e.g.:
1438+
# TODONVDEC P1:
1439+
# - unskip equality assertion checks on FFMpeg4. The comparison
1440+
# checks are failing on very few pixels, e.g.:
14391441
#
1440-
# E Mismatched elements: 648586 / 82944000 (0.8%)
1441-
# E Greatest absolute difference: 164 at index (20, 2, 27, 96)
1442-
# E Greatest relative difference: inf at index (5, 1, 112, 186)
1442+
# E Mismatched elements: 648586 / 82944000 (0.8%)
1443+
# E Greatest absolute difference: 164 at index (20, 2, 27, 96)
1444+
# E Greatest relative difference: inf at index (5, 1, 112, 186)
14431445
#
1444-
# So we're skipping them to unblock for now, but we should call
1445-
# assert_tensor_close_on_at_least or something like that.
1446+
# So we're skipping them to unblock for now, but we should call
1447+
# assert_tensor_close_on_at_least or something like that.
1448+
# - unskip equality assertion checks for MPEG4 asset. The frames are decoded
1449+
# fine, it's the color conversion that's different. The frame from the
1450+
# BETA interface is assumed to be 701 while the one from the default
1451+
# interface is 601.
14461452

14471453
@needs_cuda
14481454
@pytest.mark.parametrize(
@@ -1455,6 +1461,7 @@ def test_get_frames_at_tensor_indices(self):
14551461
AV1_VIDEO,
14561462
TEST_SRC_2_720P_VP9,
14571463
TEST_SRC_2_720P_VP8,
1464+
TEST_SRC_2_720P_MPEG4,
14581465
),
14591466
)
14601467
@pytest.mark.parametrize("contiguous_indices", (True, False))
@@ -1483,7 +1490,15 @@ def test_beta_cuda_interface_get_frame_at(
14831490
for frame_index in indices:
14841491
ref_frame = ref_decoder.get_frame_at(frame_index)
14851492
beta_frame = beta_decoder.get_frame_at(frame_index)
1486-
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
1493+
if asset == TEST_SRC_2_720P_MPEG4:
1494+
from torchvision.io import write_png
1495+
from torchvision.utils import make_grid
1496+
1497+
img = make_grid([beta_frame.data, ref_frame.data], nrow=2)
1498+
write_png(img.cpu(), f"/tmp/frame_{frame_index:04d}.png")
1499+
1500+
# TODONVDEC P1 see above
1501+
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
14871502
torch.testing.assert_close(
14881503
beta_frame.data, ref_frame.data, rtol=0, atol=0
14891504
)
@@ -1502,6 +1517,7 @@ def test_beta_cuda_interface_get_frame_at(
15021517
AV1_VIDEO,
15031518
TEST_SRC_2_720P_VP9,
15041519
TEST_SRC_2_720P_VP8,
1520+
TEST_SRC_2_720P_MPEG4,
15051521
),
15061522
)
15071523
@pytest.mark.parametrize("contiguous_indices", (True, False))
@@ -1530,7 +1546,8 @@ def test_beta_cuda_interface_get_frames_at(
15301546

15311547
ref_frames = ref_decoder.get_frames_at(indices)
15321548
beta_frames = beta_decoder.get_frames_at(indices)
1533-
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
1549+
# TODONVDEC P1 see above
1550+
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
15341551
torch.testing.assert_close(
15351552
beta_frames.data, ref_frames.data, rtol=0, atol=0
15361553
)
@@ -1550,6 +1567,7 @@ def test_beta_cuda_interface_get_frames_at(
15501567
AV1_VIDEO,
15511568
TEST_SRC_2_720P_VP9,
15521569
TEST_SRC_2_720P_VP8,
1570+
TEST_SRC_2_720P_MPEG4,
15531571
),
15541572
)
15551573
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
@@ -1573,7 +1591,8 @@ def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
15731591
for pts in timestamps:
15741592
ref_frame = ref_decoder.get_frame_played_at(pts)
15751593
beta_frame = beta_decoder.get_frame_played_at(pts)
1576-
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
1594+
# TODONVDEC P1 see above
1595+
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
15771596
torch.testing.assert_close(
15781597
beta_frame.data, ref_frame.data, rtol=0, atol=0
15791598
)
@@ -1589,9 +1608,10 @@ def test_beta_cuda_interface_get_frame_played_at(self, asset, seek_mode):
15891608
TEST_SRC_2_720P,
15901609
BT709_FULL_RANGE,
15911610
TEST_SRC_2_720P_H265,
1611+
AV1_VIDEO,
15921612
TEST_SRC_2_720P_VP9,
15931613
TEST_SRC_2_720P_VP8,
1594-
AV1_VIDEO,
1614+
TEST_SRC_2_720P_MPEG4,
15951615
),
15961616
)
15971617
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
@@ -1615,7 +1635,8 @@ def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):
16151635

16161636
ref_frames = ref_decoder.get_frames_played_at(timestamps)
16171637
beta_frames = beta_decoder.get_frames_played_at(timestamps)
1618-
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
1638+
# TODONVDEC P1 see above
1639+
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
16191640
torch.testing.assert_close(
16201641
beta_frames.data, ref_frames.data, rtol=0, atol=0
16211642
)
@@ -1635,6 +1656,7 @@ def test_beta_cuda_interface_get_frames_played_at(self, asset, seek_mode):
16351656
AV1_VIDEO,
16361657
TEST_SRC_2_720P_VP9,
16371658
TEST_SRC_2_720P_VP8,
1659+
TEST_SRC_2_720P_MPEG4,
16381660
),
16391661
)
16401662
@pytest.mark.parametrize("seek_mode", ("exact", "approximate"))
@@ -1662,7 +1684,8 @@ def test_beta_cuda_interface_backwards(self, asset, seek_mode):
16621684

16631685
ref_frame = ref_decoder.get_frame_at(frame_index)
16641686
beta_frame = beta_decoder.get_frame_at(frame_index)
1665-
if get_ffmpeg_major_version() > 4: # TODONVDEC P1 see above
1687+
# TODONVDEC P1 see above
1688+
if get_ffmpeg_major_version() > 4 and asset is not TEST_SRC_2_720P_MPEG4:
16661689
torch.testing.assert_close(
16671690
beta_frame.data, ref_frame.data, rtol=0, atol=0
16681691
)

test/utils.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -737,3 +737,13 @@ def sample_format(self) -> str:
737737
},
738738
frames={0: {}}, # Not needed for now
739739
)
740+
741+
# ffmpeg -f lavfi -i testsrc2=size=1280x720:rate=30:duration=1 -c:v mpeg4 -q:v 5 output_mpeg4.avi
742+
TEST_SRC_2_720P_MPEG4 = TestVideo(
743+
filename="testsrc2_mpeg4.avi",
744+
default_stream_index=0,
745+
stream_infos={
746+
0: TestVideoStreamInfo(width=1280, height=720, num_color_channels=3),
747+
},
748+
frames={0: {}}, # Not needed for now
749+
)

0 commit comments

Comments
 (0)