Skip to content

Commit 5f2928f

Browse files
author
Daniel Flores
committed
compare against cli, high % match
1 parent 4ab1b63 commit 5f2928f

File tree

2 files changed

+106
-12
lines changed

2 files changed

+106
-12
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
#include "src/torchcodec/_core/Encoder.h"
55
#include "torch/types.h"
66

7+
extern "C" {
8+
#include <libavutil/pixdesc.h>
9+
}
10+
711
namespace facebook::torchcodec {
812

913
namespace {
@@ -579,6 +583,7 @@ VideoEncoder::VideoEncoder(
579583

580584
void VideoEncoder::initializeEncoder(
581585
const VideoStreamOptions& videoStreamOptions) {
586+
av_log_set_level(AV_LOG_VERBOSE);
582587
const AVCodec* avCodec =
583588
avcodec_find_encoder(avFormatContext_->oformat->video_codec);
584589
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
@@ -625,12 +630,22 @@ void VideoEncoder::initializeEncoder(
625630

626631
// Apply videoStreamOptions
627632
AVDictionary* options = nullptr;
628-
if (videoStreamOptions.crf.has_value()) {
633+
if (videoStreamOptions.crf.has_value() &&
634+
(avCodec->id != AV_CODEC_ID_MPEG4 && avCodec->id != AV_CODEC_ID_FLV1)) {
629635
av_dict_set(
630636
&options,
631637
"crf",
632638
std::to_string(videoStreamOptions.crf.value()).c_str(),
633639
0);
640+
} else {
641+
// For codecs that don't support CRF (mpeg4, flv1),
642+
// use quality-based encoding via global_quality + qscale flag
643+
avCodecContext_->flags |= AV_CODEC_FLAG_QSCALE;
644+
// While qscale is similar to crf, it is likely not interchangeable.
645+
// Reuse of crf below is only intended to work in VideoEncoder tests where
646+
// crf = 0
647+
avCodecContext_->global_quality =
648+
FF_QP2LAMBDA * videoStreamOptions.crf.value();
634649
}
635650
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
636651
av_dict_free(&options);
@@ -694,7 +709,7 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame(
694709
outWidth_,
695710
outHeight_,
696711
outPixelFormat_,
697-
SWS_BILINEAR,
712+
SWS_BICUBIC, // Used by FFmpeg CLI
698713
nullptr,
699714
nullptr,
700715
nullptr));

test/test_ops.py

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
import os
1010
from functools import partial
1111

12-
from .utils import get_ffmpeg_major_version, in_fbcode, IS_WINDOWS
12+
from .utils import (
13+
assert_tensor_close_on_at_least,
14+
get_ffmpeg_major_version,
15+
in_fbcode,
16+
IS_WINDOWS,
17+
TEST_SRC_2_720P,
18+
)
1319

1420
os.environ["TORCH_LOGS"] = "output_code"
1521
import json
@@ -1383,7 +1389,7 @@ def decode(self, file_path) -> torch.Tensor:
13831389
return frames
13841390

13851391
@pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv"))
1386-
def test_video_encoder_test_round_trip(self, tmp_path, format):
1392+
def test_video_encoder_round_trip(self, tmp_path, format):
13871393
ffmpeg_version = get_ffmpeg_major_version()
13881394
if format == "webm":
13891395
if ffmpeg_version == 4:
@@ -1394,7 +1400,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13941400
pytest.skip(
13951401
"Codec for webm is not available in the FFmpeg6/7 installation on Windows."
13961402
)
1397-
asset = NASA_VIDEO
1403+
asset = TEST_SRC_2_720P
13981404
# Test that decode(encode(decode(asset))) == decode(asset)
13991405
source_frames = self.decode(str(asset.path)).data
14001406

@@ -1404,20 +1410,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
14041410
frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0
14051411
)
14061412
round_trip_frames = self.decode(encoded_path).data
1407-
1408-
# In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1409-
# Converting to the output format may perform chroma subsampling.
1410-
# Other times, no conversion between YUV and RGB is required.
1413+
assert (
1414+
source_frames.shape == round_trip_frames.shape
1415+
), f"Shape mismatch: source {source_frames.shape} vs round_trip {round_trip_frames.shape}"
1416+
assert (
1417+
source_frames.dtype == round_trip_frames.dtype
1418+
), f"Dtype mismatch: source {source_frames.dtype} vs round_trip {round_trip_frames.dtype}"
1419+
1420+
# If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1421+
# are within a higher tolerance.
14111422
if ffmpeg_version == 6 or format in ("avi", "flv"):
1412-
atol = 55
1423+
assert_close = partial(assert_tensor_close_on_at_least, percentage=99)
1424+
atol = 15
14131425
else:
1426+
assert_close = torch.testing.assert_close
14141427
atol = 2
1415-
# TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
14161428
# Check that PSNR for decode(encode(samples)) is above 30
14171429
for s_frame, rt_frame in zip(source_frames, round_trip_frames):
14181430
res = psnr(s_frame, rt_frame)
14191431
assert res > 30
1420-
torch.testing.assert_close(s_frame, rt_frame, atol=atol, rtol=0)
1432+
assert_close(s_frame, rt_frame, atol=atol, rtol=0)
1433+
1434+
@pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available")
1435+
@pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv"))
1436+
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format):
1437+
ffmpeg_version = get_ffmpeg_major_version()
1438+
if format == "webm" and ffmpeg_version == 4:
1439+
pytest.skip("Codec for webm is not available in the FFmpeg4 installation.")
1440+
asset = TEST_SRC_2_720P
1441+
source_frames = self.decode(str(asset.path)).data
1442+
frame_rate = 30
1443+
1444+
# Encode with FFmpeg CLI
1445+
temp_raw_path = str(tmp_path / "temp_input.raw")
1446+
with open(temp_raw_path, "wb") as f:
1447+
f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes())
1448+
1449+
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}")
1450+
# Test that lossless encoding is identical
1451+
crf = 0
1452+
quality_params = ["-crf", str(crf)]
1453+
# Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1454+
# Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1455+
quality_params += ["-q:v", str(crf)]
1456+
ffmpeg_cmd = [
1457+
"ffmpeg",
1458+
"-y",
1459+
"-f",
1460+
"rawvideo",
1461+
"-pix_fmt",
1462+
"rgb24",
1463+
"-s",
1464+
f"{source_frames.shape[3]}x{source_frames.shape[2]}",
1465+
"-r",
1466+
str(frame_rate),
1467+
"-i",
1468+
temp_raw_path,
1469+
*quality_params,
1470+
ffmpeg_encoded_path,
1471+
]
1472+
subprocess.run(ffmpeg_cmd, check=True)
1473+
1474+
# Encode with our video encoder
1475+
encoder_output_path = str(tmp_path / f"encoder_output.{format}")
1476+
encode_video_to_file(
1477+
frames=source_frames,
1478+
frame_rate=frame_rate,
1479+
filename=encoder_output_path,
1480+
crf=crf,
1481+
)
1482+
1483+
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data
1484+
encoder_frames = self.decode(encoder_output_path).data
1485+
1486+
assert ffmpeg_frames.shape[0] == encoder_frames.shape[0]
1487+
1488+
# If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1489+
# the VideoEncoder outputs *slightly* different frames.
1490+
# There may be additional subtle differences in the encoder.
1491+
percentage = 97 if ffmpeg_version == 6 or format in ("avi") else 99
1492+
1493+
# Check that PSNR between both encoded versions is high
1494+
for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames):
1495+
res = psnr(ff_frame, enc_frame)
1496+
assert res > 30
1497+
assert_tensor_close_on_at_least(
1498+
ff_frame, enc_frame, percentage=percentage, atol=2
1499+
)
14211500

14221501

14231502
if __name__ == "__main__":

0 commit comments

Comments
 (0)