Skip to content

Commit 206d4e4

Browse files
author
Daniel Flores
committed
compare against cli, high % match
1 parent 6e70e8f commit 206d4e4

File tree

2 files changed

+106
-12
lines changed

2 files changed

+106
-12
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
#include "src/torchcodec/_core/Encoder.h"
55
#include "torch/types.h"
66

7+
extern "C" {
8+
#include <libavutil/pixdesc.h>
9+
}
10+
711
namespace facebook::torchcodec {
812

913
namespace {
@@ -579,6 +583,7 @@ VideoEncoder::VideoEncoder(
579583

580584
void VideoEncoder::initializeEncoder(
581585
const VideoStreamOptions& videoStreamOptions) {
586+
av_log_set_level(AV_LOG_VERBOSE);
582587
const AVCodec* avCodec =
583588
avcodec_find_encoder(avFormatContext_->oformat->video_codec);
584589
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
@@ -625,12 +630,22 @@ void VideoEncoder::initializeEncoder(
625630

626631
// Apply videoStreamOptions
627632
AVDictionary* options = nullptr;
628-
if (videoStreamOptions.crf.has_value()) {
633+
if (videoStreamOptions.crf.has_value() &&
634+
(avCodec->id != AV_CODEC_ID_MPEG4 && avCodec->id != AV_CODEC_ID_FLV1)) {
629635
av_dict_set(
630636
&options,
631637
"crf",
632638
std::to_string(videoStreamOptions.crf.value()).c_str(),
633639
0);
640+
} else {
641+
// For codecs that don't support CRF (mpeg4, flv1),
642+
// use quality-based encoding via global_quality + qscale flag
643+
avCodecContext_->flags |= AV_CODEC_FLAG_QSCALE;
644+
// While qscale is similar to crf, it is likely not interchangeable.
645+
// Reuse of crf below is only intended to work in VideoEncoder tests where
646+
// crf = 0
647+
avCodecContext_->global_quality =
648+
FF_QP2LAMBDA * videoStreamOptions.crf.value();
634649
}
635650
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
636651
av_dict_free(&options);
@@ -694,7 +709,7 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame(
694709
outWidth_,
695710
outHeight_,
696711
outPixelFormat_,
697-
SWS_BILINEAR,
712+
SWS_BICUBIC, // Used by FFmpeg CLI
698713
nullptr,
699714
nullptr,
700715
nullptr));

test/test_ops.py

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
import os
1010
from functools import partial
1111

12-
from .utils import get_ffmpeg_major_version, in_fbcode, IS_WINDOWS
12+
from .utils import (
13+
assert_tensor_close_on_at_least,
14+
get_ffmpeg_major_version,
15+
in_fbcode,
16+
IS_WINDOWS,
17+
TEST_SRC_2_720P,
18+
)
1319

1420
os.environ["TORCH_LOGS"] = "output_code"
1521
import json
@@ -1341,7 +1347,7 @@ def decode(self, file_path) -> torch.Tensor:
13411347
return frames
13421348

13431349
@pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv"))
1344-
def test_video_encoder_test_round_trip(self, tmp_path, format):
1350+
def test_video_encoder_round_trip(self, tmp_path, format):
13451351
ffmpeg_version = get_ffmpeg_major_version()
13461352
if format == "webm":
13471353
if ffmpeg_version == 4:
@@ -1352,7 +1358,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13521358
pytest.skip(
13531359
"Codec for webm is not available in the FFmpeg6/7 installation on Windows."
13541360
)
1355-
asset = NASA_VIDEO
1361+
asset = TEST_SRC_2_720P
13561362
# Test that decode(encode(decode(asset))) == decode(asset)
13571363
source_frames = self.decode(str(asset.path)).data
13581364

@@ -1362,20 +1368,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13621368
frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0
13631369
)
13641370
round_trip_frames = self.decode(encoded_path).data
1365-
1366-
# In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1367-
# Converting to the output format may perform chroma subsampling.
1368-
# Other times, no conversion between YUV and RGB is required.
1371+
assert (
1372+
source_frames.shape == round_trip_frames.shape
1373+
), f"Shape mismatch: source {source_frames.shape} vs round_trip {round_trip_frames.shape}"
1374+
assert (
1375+
source_frames.dtype == round_trip_frames.dtype
1376+
), f"Dtype mismatch: source {source_frames.dtype} vs round_trip {round_trip_frames.dtype}"
1377+
1378+
# If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1379+
# are within a higher tolerance.
13691380
if ffmpeg_version == 6 or format in ("avi", "flv"):
1370-
atol = 55
1381+
assert_close = partial(assert_tensor_close_on_at_least, percentage=99)
1382+
atol = 15
13711383
else:
1384+
assert_close = torch.testing.assert_close
13721385
atol = 2
1373-
# TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
13741386
# Check that PSNR for decode(encode(samples)) is above 30
13751387
for s_frame, rt_frame in zip(source_frames, round_trip_frames):
13761388
res = psnr(s_frame, rt_frame)
13771389
assert res > 30
1378-
torch.testing.assert_close(s_frame, rt_frame, atol=atol, rtol=0)
1390+
assert_close(s_frame, rt_frame, atol=atol, rtol=0)
1391+
1392+
@pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available")
1393+
@pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv"))
1394+
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format):
1395+
ffmpeg_version = get_ffmpeg_major_version()
1396+
if format == "webm" and ffmpeg_version == 4:
1397+
pytest.skip("Codec for webm is not available in the FFmpeg4 installation.")
1398+
asset = TEST_SRC_2_720P
1399+
source_frames = self.decode(str(asset.path)).data
1400+
frame_rate = 30
1401+
1402+
# Encode with FFmpeg CLI
1403+
temp_raw_path = str(tmp_path / "temp_input.raw")
1404+
with open(temp_raw_path, "wb") as f:
1405+
f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes())
1406+
1407+
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}")
1408+
# Test that lossless encoding is identical
1409+
crf = 0
1410+
quality_params = ["-crf", str(crf)]
1411+
# Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1412+
# Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1413+
quality_params += ["-q:v", str(crf)]
1414+
ffmpeg_cmd = [
1415+
"ffmpeg",
1416+
"-y",
1417+
"-f",
1418+
"rawvideo",
1419+
"-pix_fmt",
1420+
"rgb24",
1421+
"-s",
1422+
f"{source_frames.shape[3]}x{source_frames.shape[2]}",
1423+
"-r",
1424+
str(frame_rate),
1425+
"-i",
1426+
temp_raw_path,
1427+
*quality_params,
1428+
ffmpeg_encoded_path,
1429+
]
1430+
subprocess.run(ffmpeg_cmd, check=True)
1431+
1432+
# Encode with our video encoder
1433+
encoder_output_path = str(tmp_path / f"encoder_output.{format}")
1434+
encode_video_to_file(
1435+
frames=source_frames,
1436+
frame_rate=frame_rate,
1437+
filename=encoder_output_path,
1438+
crf=crf,
1439+
)
1440+
1441+
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data
1442+
encoder_frames = self.decode(encoder_output_path).data
1443+
1444+
assert ffmpeg_frames.shape[0] == encoder_frames.shape[0]
1445+
1446+
# If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1447+
# the VideoEncoder outputs *slightly* different frames.
1448+
# There may be additional subtle differences in the encoder.
1449+
percentage = 97 if ffmpeg_version == 6 or format in ("avi") else 99
1450+
1451+
# Check that PSNR between both encoded versions is high
1452+
for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames):
1453+
res = psnr(ff_frame, enc_frame)
1454+
assert res > 30
1455+
assert_tensor_close_on_at_least(
1456+
ff_frame, enc_frame, percentage=percentage, atol=2
1457+
)
13791458

13801459

13811460
if __name__ == "__main__":

0 commit comments

Comments
 (0)