9
9
import os
10
10
from functools import partial
11
11
12
- from .utils import get_ffmpeg_major_version , in_fbcode , IS_WINDOWS
12
+ from .utils import (
13
+ assert_tensor_close_on_at_least ,
14
+ get_ffmpeg_major_version ,
15
+ in_fbcode ,
16
+ IS_WINDOWS ,
17
+ TEST_SRC_2_720P ,
18
+ )
13
19
14
20
os .environ ["TORCH_LOGS" ] = "output_code"
15
21
import json
@@ -1383,7 +1389,7 @@ def decode(self, file_path) -> torch.Tensor:
1383
1389
return frames
1384
1390
1385
1391
@pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1386
- def test_video_encoder_test_round_trip (self , tmp_path , format ):
1392
+ def test_video_encoder_round_trip (self , tmp_path , format ):
1387
1393
ffmpeg_version = get_ffmpeg_major_version ()
1388
1394
if format == "webm" :
1389
1395
if ffmpeg_version == 4 :
@@ -1394,7 +1400,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
1394
1400
pytest .skip (
1395
1401
"Codec for webm is not available in the FFmpeg6/7 installation on Windows."
1396
1402
)
1397
- asset = NASA_VIDEO
1403
+ asset = TEST_SRC_2_720P
1398
1404
# Test that decode(encode(decode(asset))) == decode(asset)
1399
1405
source_frames = self .decode (str (asset .path )).data
1400
1406
@@ -1404,20 +1410,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
1404
1410
frames = source_frames , frame_rate = frame_rate , filename = encoded_path , crf = 0
1405
1411
)
1406
1412
round_trip_frames = self .decode (encoded_path ).data
1407
-
1408
- # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1409
- # Converting to the output format may perform chroma subsampling.
1410
- # Other times, no conversion between YUV and RGB is required.
1413
+ assert (
1414
+ source_frames .shape == round_trip_frames .shape
1415
+ ), f"Shape mismatch: source { source_frames .shape } vs round_trip { round_trip_frames .shape } "
1416
+ assert (
1417
+ source_frames .dtype == round_trip_frames .dtype
1418
+ ), f"Dtype mismatch: source { source_frames .dtype } vs round_trip { round_trip_frames .dtype } "
1419
+
1420
+ # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1421
+ # are within a higher tolerance.
1411
1422
if ffmpeg_version == 6 or format in ("avi" , "flv" ):
1412
- atol = 55
1423
+ assert_close = partial (assert_tensor_close_on_at_least , percentage = 99 )
1424
+ atol = 15
1413
1425
else :
1426
+ assert_close = torch .testing .assert_close
1414
1427
atol = 2
1415
- # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
1416
1428
# Check that PSNR for decode(encode(samples)) is above 30
1417
1429
for s_frame , rt_frame in zip (source_frames , round_trip_frames ):
1418
1430
res = psnr (s_frame , rt_frame )
1419
1431
assert res > 30
1420
- torch .testing .assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1432
+ assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1433
+
1434
+ @pytest .mark .skipif (in_fbcode (), reason = "ffmpeg CLI not available" )
1435
+ @pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1436
+ def test_video_encoder_against_ffmpeg_cli (self , tmp_path , format ):
1437
+ ffmpeg_version = get_ffmpeg_major_version ()
1438
+ if format == "webm" and ffmpeg_version == 4 :
1439
+ pytest .skip ("Codec for webm is not available in the FFmpeg4 installation." )
1440
+ asset = TEST_SRC_2_720P
1441
+ source_frames = self .decode (str (asset .path )).data
1442
+ frame_rate = 30
1443
+
1444
+ # Encode with FFmpeg CLI
1445
+ temp_raw_path = str (tmp_path / "temp_input.raw" )
1446
+ with open (temp_raw_path , "wb" ) as f :
1447
+ f .write (source_frames .permute (0 , 2 , 3 , 1 ).cpu ().numpy ().tobytes ())
1448
+
1449
+ ffmpeg_encoded_path = str (tmp_path / f"ffmpeg_output.{ format } " )
1450
+ # Test that lossless encoding is identical
1451
+ crf = 0
1452
+ quality_params = ["-crf" , str (crf )]
1453
+ # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1454
+ # Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1455
+ quality_params += ["-q:v" , str (crf )]
1456
+ ffmpeg_cmd = [
1457
+ "ffmpeg" ,
1458
+ "-y" ,
1459
+ "-f" ,
1460
+ "rawvideo" ,
1461
+ "-pix_fmt" ,
1462
+ "rgb24" ,
1463
+ "-s" ,
1464
+ f"{ source_frames .shape [3 ]} x{ source_frames .shape [2 ]} " ,
1465
+ "-r" ,
1466
+ str (frame_rate ),
1467
+ "-i" ,
1468
+ temp_raw_path ,
1469
+ * quality_params ,
1470
+ ffmpeg_encoded_path ,
1471
+ ]
1472
+ subprocess .run (ffmpeg_cmd , check = True )
1473
+
1474
+ # Encode with our video encoder
1475
+ encoder_output_path = str (tmp_path / f"encoder_output.{ format } " )
1476
+ encode_video_to_file (
1477
+ frames = source_frames ,
1478
+ frame_rate = frame_rate ,
1479
+ filename = encoder_output_path ,
1480
+ crf = crf ,
1481
+ )
1482
+
1483
+ ffmpeg_frames = self .decode (ffmpeg_encoded_path ).data
1484
+ encoder_frames = self .decode (encoder_output_path ).data
1485
+
1486
+ assert ffmpeg_frames .shape [0 ] == encoder_frames .shape [0 ]
1487
+
1488
+ # If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1489
+ # the VideoEncoder outputs *slightly* different frames.
1490
+ # There may be additional subtle differences in the encoder.
1491
+ percentage = 97 if ffmpeg_version == 6 or format in ("avi" ) else 99
1492
+
1493
+ # Check that PSNR between both encoded versions is high
1494
+ for ff_frame , enc_frame in zip (ffmpeg_frames , encoder_frames ):
1495
+ res = psnr (ff_frame , enc_frame )
1496
+ assert res > 30
1497
+ assert_tensor_close_on_at_least (
1498
+ ff_frame , enc_frame , percentage = percentage , atol = 2
1499
+ )
1421
1500
1422
1501
1423
1502
if __name__ == "__main__" :
0 commit comments