Skip to content

Commit f414d0b

Browse files
author
Daniel Flores
committed
compare against cli, high % match
1 parent 72f19ac commit f414d0b

File tree

2 files changed

+106
-12
lines changed

2 files changed

+106
-12
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
#include "src/torchcodec/_core/Encoder.h"
55
#include "torch/types.h"
66

7+
extern "C" {
8+
#include <libavutil/pixdesc.h>
9+
}
10+
711
namespace facebook::torchcodec {
812

913
namespace {
@@ -579,6 +583,7 @@ VideoEncoder::VideoEncoder(
579583

580584
void VideoEncoder::initializeEncoder(
581585
const VideoStreamOptions& videoStreamOptions) {
586+
av_log_set_level(AV_LOG_VERBOSE);
582587
const AVCodec* avCodec =
583588
avcodec_find_encoder(avFormatContext_->oformat->video_codec);
584589
TORCH_CHECK(avCodec != nullptr, "Video codec not found");
@@ -625,12 +630,22 @@ void VideoEncoder::initializeEncoder(
625630

626631
// Apply videoStreamOptions
627632
AVDictionary* options = nullptr;
628-
if (videoStreamOptions.crf.has_value()) {
633+
if (videoStreamOptions.crf.has_value() &&
634+
(avCodec->id != AV_CODEC_ID_MPEG4 && avCodec->id != AV_CODEC_ID_FLV1)) {
629635
av_dict_set(
630636
&options,
631637
"crf",
632638
std::to_string(videoStreamOptions.crf.value()).c_str(),
633639
0);
640+
} else {
641+
// For codecs that don't support CRF (mpeg4, flv1),
642+
// use quality-based encoding via global_quality + qscale flag
643+
avCodecContext_->flags |= AV_CODEC_FLAG_QSCALE;
644+
// While qscale is similar to crf, it is likely not interchangeable.
645+
// Reuse of crf below is only intended to work in VideoEncoder tests where
646+
// crf = 0
647+
avCodecContext_->global_quality =
648+
FF_QP2LAMBDA * videoStreamOptions.crf.value();
634649
}
635650
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
636651
av_dict_free(&options);
@@ -694,7 +709,7 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame(
694709
outWidth_,
695710
outHeight_,
696711
outPixelFormat_,
697-
SWS_BILINEAR,
712+
SWS_BICUBIC, // Used by FFmpeg CLI
698713
nullptr,
699714
nullptr,
700715
nullptr));

test/test_ops.py

Lines changed: 89 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,13 @@
99
import os
1010
from functools import partial
1111

12-
from .utils import get_ffmpeg_major_version, in_fbcode, IS_WINDOWS
12+
from .utils import (
13+
assert_tensor_close_on_at_least,
14+
get_ffmpeg_major_version,
15+
in_fbcode,
16+
IS_WINDOWS,
17+
TEST_SRC_2_720P,
18+
)
1319

1420
os.environ["TORCH_LOGS"] = "output_code"
1521
import json
@@ -1309,7 +1315,7 @@ def decode(self, file_path) -> torch.Tensor:
13091315
return frames
13101316

13111317
@pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv"))
1312-
def test_video_encoder_test_round_trip(self, tmp_path, format):
1318+
def test_video_encoder_round_trip(self, tmp_path, format):
13131319
ffmpeg_version = get_ffmpeg_major_version()
13141320
if format == "webm":
13151321
if ffmpeg_version == 4:
@@ -1320,7 +1326,7 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13201326
pytest.skip(
13211327
"Codec for webm is not available in the FFmpeg6/7 installation on Windows."
13221328
)
1323-
asset = NASA_VIDEO
1329+
asset = TEST_SRC_2_720P
13241330
# Test that decode(encode(decode(asset))) == decode(asset)
13251331
source_frames = self.decode(str(asset.path)).data
13261332

@@ -1330,20 +1336,93 @@ def test_video_encoder_test_round_trip(self, tmp_path, format):
13301336
frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0
13311337
)
13321338
round_trip_frames = self.decode(encoded_path).data
1333-
1334-
# In the cases where a lossy pixel format conversion occurs, higher tolerance is needed.
1335-
# Converting to the output format may perform chroma subsampling.
1336-
# Other times, no conversion between YUV and RGB is required.
1339+
assert (
1340+
source_frames.shape == round_trip_frames.shape
1341+
), f"Shape mismatch: source {source_frames.shape} vs round_trip {round_trip_frames.shape}"
1342+
assert (
1343+
source_frames.dtype == round_trip_frames.dtype
1344+
), f"Dtype mismatch: source {source_frames.dtype} vs round_trip {round_trip_frames.dtype}"
1345+
1346+
# If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels
1347+
# are within a higher tolerance.
13371348
if ffmpeg_version == 6 or format in ("avi", "flv"):
1338-
atol = 55
1349+
assert_close = partial(assert_tensor_close_on_at_least, percentage=99)
1350+
atol = 15
13391351
else:
1352+
assert_close = torch.testing.assert_close
13401353
atol = 2
1341-
# TODO-VideoEncoder: Test with FFmpeg's testsrc2 video
13421354
# Check that PSNR for decode(encode(samples)) is above 30
13431355
for s_frame, rt_frame in zip(source_frames, round_trip_frames):
13441356
res = psnr(s_frame, rt_frame)
13451357
assert res > 30
1346-
torch.testing.assert_close(s_frame, rt_frame, atol=atol, rtol=0)
1358+
assert_close(s_frame, rt_frame, atol=atol, rtol=0)
1359+
1360+
@pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available")
1361+
@pytest.mark.parametrize("format", ("mov", "mp4", "avi", "mkv", "webm", "flv"))
1362+
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format):
1363+
ffmpeg_version = get_ffmpeg_major_version()
1364+
if format == "webm" and ffmpeg_version == 4:
1365+
pytest.skip("Codec for webm is not available in the FFmpeg4 installation.")
1366+
asset = TEST_SRC_2_720P
1367+
source_frames = self.decode(str(asset.path)).data
1368+
frame_rate = 30
1369+
1370+
# Encode with FFmpeg CLI
1371+
temp_raw_path = str(tmp_path / "temp_input.raw")
1372+
with open(temp_raw_path, "wb") as f:
1373+
f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes())
1374+
1375+
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}")
1376+
# Test that lossless encoding is identical
1377+
crf = 0
1378+
quality_params = ["-crf", str(crf)]
1379+
# Some codecs (ex. MPEG4) do not support CRF, qscale is used for lossless encoding.
1380+
# Flags not supported by the selected codec will be ignored, so we set both crf and qscale.
1381+
quality_params += ["-q:v", str(crf)]
1382+
ffmpeg_cmd = [
1383+
"ffmpeg",
1384+
"-y",
1385+
"-f",
1386+
"rawvideo",
1387+
"-pix_fmt",
1388+
"rgb24",
1389+
"-s",
1390+
f"{source_frames.shape[3]}x{source_frames.shape[2]}",
1391+
"-r",
1392+
str(frame_rate),
1393+
"-i",
1394+
temp_raw_path,
1395+
*quality_params,
1396+
ffmpeg_encoded_path,
1397+
]
1398+
subprocess.run(ffmpeg_cmd, check=True)
1399+
1400+
# Encode with our video encoder
1401+
encoder_output_path = str(tmp_path / f"encoder_output.{format}")
1402+
encode_video_to_file(
1403+
frames=source_frames,
1404+
frame_rate=frame_rate,
1405+
filename=encoder_output_path,
1406+
crf=crf,
1407+
)
1408+
1409+
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data
1410+
encoder_frames = self.decode(encoder_output_path).data
1411+
1412+
assert ffmpeg_frames.shape[0] == encoder_frames.shape[0]
1413+
1414+
# If FFmpeg selects a codec or pixel format that uses qscale (not crf),
1415+
# the VideoEncoder outputs *slightly* different frames.
1416+
# There may be additional subtle differences in the encoder.
1417+
percentage = 97 if ffmpeg_version == 6 or format in ("avi") else 99
1418+
1419+
# Check that PSNR between both encoded versions is high
1420+
for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames):
1421+
res = psnr(ff_frame, enc_frame)
1422+
assert res > 30
1423+
assert_tensor_close_on_at_least(
1424+
ff_frame, enc_frame, percentage=percentage, atol=2
1425+
)
13471426

13481427

13491428
if __name__ == "__main__":

0 commit comments

Comments
 (0)