9
9
import os
10
10
from functools import partial
11
11
12
- from .utils import get_ffmpeg_major_version , in_fbcode , IS_WINDOWS
12
+ from .utils import (
13
+ assert_tensor_close_on_at_least ,
14
+ get_ffmpeg_major_version ,
15
+ in_fbcode ,
16
+ IS_WINDOWS ,
17
+ TEST_SRC_2_720P ,
18
+ )
13
19
14
20
os .environ ["TORCH_LOGS" ] = "output_code"
15
21
import json
@@ -1309,7 +1315,7 @@ def decode(self, file_path) -> torch.Tensor:
1309
1315
return frames
1310
1316
1311
1317
@pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1312
- def test_video_encoder_test_round_trip (self , tmp_path , format ):
1318
+ def test_video_encoder_round_trip (self , tmp_path , format ):
1313
1319
ffmpeg_version = get_ffmpeg_major_version ()
1314
1320
if format == "webm" :
1315
1321
if ffmpeg_version == 4 :
@@ -1320,7 +1326,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
1320
1326
pytest .skip (
1321
1327
"Codec for webm is not available in the FFmpeg6/7 installation on Windows."
1322
1328
)
1323
- asset = NASA_VIDEO
1329
+ asset = TEST_SRC_2_720P
1324
1330
# Test that decode(encode(decode(asset))) == decode(asset)
1325
1331
source_frames = self .decode (str (asset .path )).data
1326
1332
@@ -1330,20 +1336,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
1330
1336
frames = source_frames , frame_rate = frame_rate , filename = encoded_path , crf = 0
1331
1337
)
1332
1338
round_trip_frames = self .decode (encoded_path ).data
1333
-
1334
- # In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1335
- # Converting to the output format may perform chroma subsampling.
1336
- # Other times, no conversion between YUV and RGB is required.
1339
+ assert (
1340
+ source_frames .shape == round_trip_frames .shape
1341
+ ), f"Shape mismatch: source { source_frames .shape } vs round_trip { round_trip_frames .shape } "
1342
+ assert (
1343
+ source_frames .dtype == round_trip_frames .dtype
1344
+ ), f"Dtype mismatch: source { source_frames .dtype } vs round_trip { round_trip_frames .dtype } "
1345
+
1346
+ # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1347
+ # are within a higher tolerance.
1337
1348
if ffmpeg_version == 6 or format in ("avi" , "flv" ):
1338
- atol = 55
1349
+ assert_close = partial (assert_tensor_close_on_at_least , percentage = 99 )
1350
+ atol = 15
1339
1351
else :
1352
+ assert_close = torch .testing .assert_close
1340
1353
atol = 2
1341
- # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
1342
1354
# Check that PSNR for decode(encode(samples)) is above 30
1343
1355
for s_frame , rt_frame in zip (source_frames , round_trip_frames ):
1344
1356
res = psnr (s_frame , rt_frame )
1345
1357
assert res > 30
1346
- torch .testing .assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1358
+ assert_close (s_frame , rt_frame , atol = atol , rtol = 0 )
1359
+
1360
+ @pytest .mark .skipif (in_fbcode (), reason = "ffmpeg CLI not available" )
1361
+ @pytest .mark .parametrize ("format" , ("mov" , "mp4" , "avi" , "mkv" , "webm" , "flv" ))
1362
+ def test_video_encoder_against_ffmpeg_cli (self , tmp_path , format ):
1363
+ ffmpeg_version = get_ffmpeg_major_version ()
1364
+ if format == "webm" and ffmpeg_version == 4 :
1365
+ pytest .skip ("Codec for webm is not available in the FFmpeg4 installation." )
1366
+ asset = TEST_SRC_2_720P
1367
+ source_frames = self .decode (str (asset .path )).data
1368
+ frame_rate = 30
1369
+
1370
+ # Encode with FFmpeg CLI
1371
+ temp_raw_path = str (tmp_path / "temp_input.raw" )
1372
+ with open (temp_raw_path , "wb" ) as f :
1373
+ f .write (source_frames .permute (0 , 2 , 3 , 1 ).cpu ().numpy ().tobytes ())
1374
+
1375
+ ffmpeg_encoded_path = str (tmp_path / f"ffmpeg_output.{ format } " )
1376
+ # Test that lossless encoding is identical
1377
+ crf = 0
1378
+ quality_params = ["-crf" , str (crf )]
1379
+ # Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1380
+ # Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1381
+ quality_params += ["-q:v" , str (crf )]
1382
+ ffmpeg_cmd = [
1383
+ "ffmpeg" ,
1384
+ "-y" ,
1385
+ "-f" ,
1386
+ "rawvideo" ,
1387
+ "-pix_fmt" ,
1388
+ "rgb24" ,
1389
+ "-s" ,
1390
+ f"{ source_frames .shape [3 ]} x{ source_frames .shape [2 ]} " ,
1391
+ "-r" ,
1392
+ str (frame_rate ),
1393
+ "-i" ,
1394
+ temp_raw_path ,
1395
+ * quality_params ,
1396
+ ffmpeg_encoded_path ,
1397
+ ]
1398
+ subprocess .run (ffmpeg_cmd , check = True )
1399
+
1400
+ # Encode with our video encoder
1401
+ encoder_output_path = str (tmp_path / f"encoder_output.{ format } " )
1402
+ encode_video_to_file (
1403
+ frames = source_frames ,
1404
+ frame_rate = frame_rate ,
1405
+ filename = encoder_output_path ,
1406
+ crf = crf ,
1407
+ )
1408
+
1409
+ ffmpeg_frames = self .decode (ffmpeg_encoded_path ).data
1410
+ encoder_frames = self .decode (encoder_output_path ).data
1411
+
1412
+ assert ffmpeg_frames .shape [0 ] == encoder_frames .shape [0 ]
1413
+
1414
+ # If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1415
+ # the VideoEncoder outputs *slightly* different frames.
1416
+ # There may be additional subtle differences in the encoder.
1417
+ percentage = 97 if ffmpeg_version == 6 or format in ("avi" ) else 99
1418
+
1419
+ # Check that PSNR between both encoded versions is high
1420
+ for ff_frame , enc_frame in zip (ffmpeg_frames , encoder_frames ):
1421
+ res = psnr (ff_frame , enc_frame )
1422
+ assert res > 30
1423
+ assert_tensor_close_on_at_least (
1424
+ ff_frame , enc_frame , percentage = percentage , atol = 2
1425
+ )
1347
1426
1348
1427
1349
1428
if __name__ == "__main__" :
0 commit comments