99import os
1010from functools import partial
1111
12- from .utils import get_ffmpeg_major_version , in_fbcode , IS_WINDOWS
12+ from .utils import (
13+ assert_tensor_close_on_at_least ,
14+ get_ffmpeg_major_version ,
15+ in_fbcode ,
16+ IS_WINDOWS ,
17+ TEST_SRC_2_720P ,
18+ )
1319
1420os .environ ["TORCH_LOGS" ] = "output_code"
1521import json
@@ -1341,7 +1347,7 @@ def decode(self, file_path) -> torch.Tensor:
13411347 return frames
13421348
13431349 @pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1344- def test_video_encoder_test_round_trip (self , tmp_path , format ):
1350+ def test_video_encoder_round_trip (self , tmp_path , format ):
13451351 ffmpeg_version = get_ffmpeg_major_version ()
13461352 if format == "webm" :
13471353 if ffmpeg_version == 4 :
@@ -1352,7 +1358,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13521358 pytest .skip (
13531359 "Codec for webm is not available in the FFmpeg6/7 installation on Windows."
13541360 )
1355- asset = NASA_VIDEO
1361+ asset = TEST_SRC_2_720P
13561362 # Test that decode(encode(decode(asset))) == decode(asset)
13571363 source_frames = self .decode (str (asset .path )).data
13581364
@@ -1362,20 +1368,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13621368 frames = source_frames , frame_rate = frame_rate , filename = encoded_path , crf = 0
13631369 )
13641370 round_trip_frames = self .decode (encoded_path ).data
1365-
1366- # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1367- # Converting to the output format may perform chroma subsampling.
1368- # Other times, no conversion between YUV and RGB is required.
1371+ assert (
1372+ source_frames .shape == round_trip_frames .shape
1373+ ), f"Shape mismatch: source { source_frames .shape } vs round_trip { round_trip_frames .shape } "
1374+ assert (
1375+ source_frames .dtype == round_trip_frames .dtype
1376+ ), f"Dtype mismatch: source { source_frames .dtype } vs round_trip { round_trip_frames .dtype } "
1377+
1378+ # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1379+ # are within a higher tolerance.
13691380 if ffmpeg_version == 6 or format in ("avi" , "flv" ):
1370- atol = 55
1381+ assert_close = partial (assert_tensor_close_on_at_least , percentage = 99 )
1382+ atol = 15
13711383 else :
1384+ assert_close = torch .testing .assert_close
13721385 atol = 2
1373- # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
13741386 # Check that PSNR for decode(encode(samples)) is above 30
13751387 for s_frame , rt_frame in zip (source_frames , round_trip_frames ):
13761388 res = psnr (s_frame , rt_frame )
13771389 assert res > 30
1378- torch .testing .assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1390+ assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1391+
1392+ @pytest .mark .skipif (in_fbcode (), reason = "ffmpeg CLI not available" )
1393+ @pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1394+ def test_video_encoder_against_ffmpeg_cli (self , tmp_path , format ):
1395+ ffmpeg_version = get_ffmpeg_major_version ()
1396+ if format == "webm" and ffmpeg_version == 4 :
1397+ pytest .skip ("Codec for webm is not available in the FFmpeg4 installation." )
1398+ asset = TEST_SRC_2_720P
1399+ source_frames = self .decode (str (asset .path )).data
1400+ frame_rate = 30
1401+
1402+ # Encode with FFmpeg CLI
1403+ temp_raw_path = str (tmp_path / "temp_input.raw" )
1404+ with open (temp_raw_path , "wb" ) as f :
1405+ f .write (source_frames .permute (0 , 2 , 3 , 1 ).cpu ().numpy ().tobytes ())
1406+
1407+ ffmpeg_encoded_path = str (tmp_path / f"ffmpeg_output.{ format } " )
1408+ # Test that lossless encoding is identical
1409+ crf = 0
1410+ quality_params = ["-crf" , str (crf )]
1411+ # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1412+ # Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1413+ quality_params += ["-q:v" , str (crf )]
1414+ ffmpeg_cmd = [
1415+ "ffmpeg" ,
1416+ "-y" ,
1417+ "-f" ,
1418+ "rawvideo" ,
1419+ "-pix_fmt" ,
1420+ "rgb24" ,
1421+ "-s" ,
1422+ f"{ source_frames .shape [3 ]} x{ source_frames .shape [2 ]} " ,
1423+ "-r" ,
1424+ str (frame_rate ),
1425+ "-i" ,
1426+ temp_raw_path ,
1427+ * quality_params ,
1428+ ffmpeg_encoded_path ,
1429+ ]
1430+ subprocess .run (ffmpeg_cmd , check = True )
1431+
1432+ # Encode with our video encoder
1433+ encoder_output_path = str (tmp_path / f"encoder_output.{ format } " )
1434+ encode_video_to_file (
1435+ frames = source_frames ,
1436+ frame_rate = frame_rate ,
1437+ filename = encoder_output_path ,
1438+ crf = crf ,
1439+ )
1440+
1441+ ffmpeg_frames = self .decode (ffmpeg_encoded_path ).data
1442+ encoder_frames = self .decode (encoder_output_path ).data
1443+
1444+ assert ffmpeg_frames .shape [0 ] == encoder_frames .shape [0 ]
1445+
1446+ # If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1447+ # the VideoEncoder outputs *slightly* different frames.
1448+ # There may be additional subtle differences in the encoder.
1449+ percentage = 97 if ffmpeg_version == 6 or format in ("avi" ) else 99
1450+
1451+ # Check that PSNR between both encoded versions is high
1452+ for ff_frame , enc_frame in zip (ffmpeg_frames , encoder_frames ):
1453+ res = psnr (ff_frame , enc_frame )
1454+ assert res > 30
1455+ assert_tensor_close_on_at_least (
1456+ ff_frame , enc_frame , percentage = percentage , atol = 2
1457+ )
13791458
13801459
13811460if __name__ == "__main__" :
0 commit comments