99import os
1010from functools import partial
1111
12- from .utils import get_ffmpeg_major_version , in_fbcode , IS_WINDOWS
12+ from .utils import (
13+ assert_tensor_close_on_at_least ,
14+ get_ffmpeg_major_version ,
15+ in_fbcode ,
16+ IS_WINDOWS ,
17+ TEST_SRC_2_720P ,
18+ )
1319
1420os .environ ["TORCH_LOGS" ] = "output_code"
1521import json
@@ -1309,7 +1315,7 @@ def decode(self, file_path) -> torch.Tensor:
13091315 return frames
13101316
13111317 @pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1312- def test_video_encoder_test_round_trip (self , tmp_path , format ):
1318+ def test_video_encoder_round_trip (self , tmp_path , format ):
13131319 ffmpeg_version = get_ffmpeg_major_version ()
13141320 if format == "webm" :
13151321 if ffmpeg_version == 4 :
@@ -1320,7 +1326,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13201326 pytest .skip (
13211327 "Codec for webm is not available in the FFmpeg6/7 installation on Windows."
13221328 )
1323- asset = NASA_VIDEO
1329+ asset = TEST_SRC_2_720P
13241330 # Test that decode(encode(decode(asset))) == decode(asset)
13251331 source_frames = self .decode (str (asset .path )).data
13261332
@@ -1330,20 +1336,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13301336 frames = source_frames , frame_rate = frame_rate , filename = encoded_path , crf = 0
13311337 )
13321338 round_trip_frames = self .decode (encoded_path ).data
1333-
1334- # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1335- # Converting to the output format may perform chroma subsampling.
1336- # Other times, no conversion between YUV and RGB is required.
1339+ assert (
1340+ source_frames .shape == round_trip_frames .shape
1341+ ), f"Shape mismatch: source { source_frames .shape } vs round_trip { round_trip_frames .shape } "
1342+ assert (
1343+ source_frames .dtype == round_trip_frames .dtype
1344+ ), f"Dtype mismatch: source { source_frames .dtype } vs round_trip { round_trip_frames .dtype } "
1345+
1346+ # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1347+ # are within a higher tolerance.
13371348 if ffmpeg_version == 6 or format in ("avi" , "flv" ):
1338- atol = 55
1349+ assert_close = partial (assert_tensor_close_on_at_least , percentage = 99 )
1350+ atol = 15
13391351 else :
1352+ assert_close = torch .testing .assert_close
13401353 atol = 2
1341- # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
13421354 # Check that PSNR for decode(encode(samples)) is above 30
13431355 for s_frame , rt_frame in zip (source_frames , round_trip_frames ):
13441356 res = psnr (s_frame , rt_frame )
13451357 assert res > 30
1346- torch .testing .assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1358+ assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1359+
1360+ @pytest .mark .skipif (in_fbcode (), reason = "ffmpeg CLI not available" )
1361+ @pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1362+ def test_video_encoder_against_ffmpeg_cli (self , tmp_path , format ):
1363+ ffmpeg_version = get_ffmpeg_major_version ()
1364+ if format == "webm" and ffmpeg_version == 4 :
1365+ pytest .skip ("Codec for webm is not available in the FFmpeg4 installation." )
1366+ asset = TEST_SRC_2_720P
1367+ source_frames = self .decode (str (asset .path )).data
1368+ frame_rate = 30
1369+
1370+ # Encode with FFmpeg CLI
1371+ temp_raw_path = str (tmp_path / "temp_input.raw" )
1372+ with open (temp_raw_path , "wb" ) as f :
1373+ f .write (source_frames .permute (0 , 2 , 3 , 1 ).cpu ().numpy ().tobytes ())
1374+
1375+ ffmpeg_encoded_path = str (tmp_path / f"ffmpeg_output.{ format } " )
1376+ # Test that lossless encoding is identical
1377+ crf = 0
1378+ quality_params = ["-crf" , str (crf )]
1379+ # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1380+ # Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1381+ quality_params += ["-q:v" , str (crf )]
1382+ ffmpeg_cmd = [
1383+ "ffmpeg" ,
1384+ "-y" ,
1385+ "-f" ,
1386+ "rawvideo" ,
1387+ "-pix_fmt" ,
1388+ "rgb24" ,
1389+ "-s" ,
1390+ f"{ source_frames .shape [3 ]} x{ source_frames .shape [2 ]} " ,
1391+ "-r" ,
1392+ str (frame_rate ),
1393+ "-i" ,
1394+ temp_raw_path ,
1395+ * quality_params ,
1396+ ffmpeg_encoded_path ,
1397+ ]
1398+ subprocess .run (ffmpeg_cmd , check = True )
1399+
1400+ # Encode with our video encoder
1401+ encoder_output_path = str (tmp_path / f"encoder_output.{ format } " )
1402+ encode_video_to_file (
1403+ frames = source_frames ,
1404+ frame_rate = frame_rate ,
1405+ filename = encoder_output_path ,
1406+ crf = crf ,
1407+ )
1408+
1409+ ffmpeg_frames = self .decode (ffmpeg_encoded_path ).data
1410+ encoder_frames = self .decode (encoder_output_path ).data
1411+
1412+ assert ffmpeg_frames .shape [0 ] == encoder_frames .shape [0 ]
1413+
1414+ # If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1415+ # the VideoEncoder outputs *slightly* different frames.
1416+ # There may be additional subtle differences in the encoder.
1417+ percentage = 97 if ffmpeg_version == 6 or format in ("avi" ) else 99
1418+
1419+ # Check that PSNR between both encoded versions is high
1420+ for ff_frame , enc_frame in zip (ffmpeg_frames , encoder_frames ):
1421+ res = psnr (ff_frame , enc_frame )
1422+ assert res > 30
1423+ assert_tensor_close_on_at_least (
1424+ ff_frame , enc_frame , percentage = percentage , atol = 2
1425+ )
13471426
13481427
13491428if __name__ == "__main__" :
0 commit comments