-
Notifications
You must be signed in to change notification settings - Fork 64
Update Video Encoder and tests for 6 container formats #913
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
56a96fb
f09fe35
5c94fda
f438729
1162beb
ea85cfe
f0fffca
444254e
4bac987
75c5b36
796499e
2cafa10
1aebfec
49d85d6
4ab1b63
5f2928f
2055291
e0e456c
d2b2f14
d7bb786
266f9f5
4516b35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -153,7 +153,7 @@ class VideoEncoder { | |
|
||
UniqueEncodingAVFormatContext avFormatContext_; | ||
UniqueAVCodecContext avCodecContext_; | ||
int streamIndex_ = -1; | ||
AVStream* avStream_; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIUC we now store avStream_ mostly because we need to access |
||
UniqueSwsContext swsContext_; | ||
|
||
const torch::Tensor frames_; | ||
|
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -9,8 +9,6 @@ | |||
import os | ||||
from functools import partial | ||||
|
||||
from .utils import in_fbcode | ||||
|
||||
os.environ["TORCH_LOGS"] = "output_code" | ||||
import json | ||||
import subprocess | ||||
|
@@ -47,6 +45,10 @@ | |||
from .utils import ( | ||||
all_supported_devices, | ||||
assert_frames_equal, | ||||
assert_tensor_close_on_at_least, | ||||
get_ffmpeg_major_version, | ||||
in_fbcode, | ||||
IS_WINDOWS, | ||||
NASA_AUDIO, | ||||
NASA_AUDIO_MP3, | ||||
NASA_VIDEO, | ||||
|
@@ -55,6 +57,7 @@ | |||
SINE_MONO_S32, | ||||
SINE_MONO_S32_44100, | ||||
SINE_MONO_S32_8000, | ||||
TEST_SRC_2_720P, | ||||
unsplit_device_str, | ||||
) | ||||
|
||||
|
@@ -1381,24 +1384,117 @@ def decode(self, file_path) -> torch.Tensor: | |||
frames, *_ = get_frames_in_range(decoder, start=0, stop=60) | ||||
return frames | ||||
|
||||
@pytest.mark.parametrize("format", ("mov", "mp4", "avi")) | ||||
# TODO-VideoEncoder: enable additional formats (mkv, webm) | ||||
def test_video_encoder_test_round_trip(self, tmp_path, format): | ||||
# TODO-VideoEncoder: Test with FFmpeg's testsrc2 video | ||||
asset = NASA_VIDEO | ||||
|
||||
@pytest.mark.parametrize("format", ("mov", "mp4", "mkv", "webm")) | ||||
def test_video_encoder_round_trip(self, tmp_path, format): | ||||
# Test that decode(encode(decode(asset))) == decode(asset) | ||||
ffmpeg_version = get_ffmpeg_major_version() | ||||
# In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm. | ||||
# As a result, we skip the round trip test. | ||||
if ffmpeg_version == 6 and format != "webm": | ||||
pytest.skip( | ||||
f"FFmpeg6 defaults to lossy encoding for {format}, skipping round-trip test." | ||||
) | ||||
if format == "webm" and ( | ||||
ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) | ||||
): | ||||
pytest.skip("Codec for webm is not available in this FFmpeg installation.") | ||||
asset = TEST_SRC_2_720P | ||||
source_frames = self.decode(str(asset.path)).data | ||||
|
||||
encoded_path = str(tmp_path / f"encoder_output.{format}") | ||||
frame_rate = 30 # Frame rate is fixed with num frames decoded | ||||
encode_video_to_file(source_frames, frame_rate, encoded_path) | ||||
encode_video_to_file( | ||||
frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 | ||||
) | ||||
round_trip_frames = self.decode(encoded_path).data | ||||
|
||||
# Check that PSNR for decode(encode(samples)) is above 30 | ||||
assert source_frames.shape == round_trip_frames.shape | ||||
assert source_frames.dtype == round_trip_frames.dtype | ||||
|
||||
# If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels | ||||
# are within a higher tolerance. | ||||
if ffmpeg_version == 6 or format in ("avi", "flv"): | ||||
assert_close = partial(assert_tensor_close_on_at_least, percentage=99) | ||||
atol = 15 | ||||
else: | ||||
assert_close = torch.testing.assert_close | ||||
atol = 2 | ||||
for s_frame, rt_frame in zip(source_frames, round_trip_frames): | ||||
res = psnr(s_frame, rt_frame) | ||||
assert psnr(s_frame, rt_frame) > 30 | ||||
assert_close(s_frame, rt_frame, atol=atol, rtol=0) | ||||
|
||||
@pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's duplicated
Suggested change
|
||||
@pytest.mark.parametrize( | ||||
"format", ("mov", "mp4", "avi", "mkv", "webm", "flv", "gif") | ||||
) | ||||
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): | ||||
ffmpeg_version = get_ffmpeg_major_version() | ||||
if format == "webm": | ||||
if ffmpeg_version == 4: | ||||
pytest.skip( | ||||
"Codec for webm is not available in the FFmpeg4 installation." | ||||
) | ||||
if IS_WINDOWS and ffmpeg_version in (6, 7): | ||||
pytest.skip( | ||||
"Codec for webm is not available in the FFmpeg6/7 installation on Windows." | ||||
) | ||||
asset = TEST_SRC_2_720P | ||||
source_frames = self.decode(str(asset.path)).data | ||||
frame_rate = 30 | ||||
|
||||
# Encode with FFmpeg CLI | ||||
temp_raw_path = str(tmp_path / "temp_input.raw") | ||||
with open(temp_raw_path, "wb") as f: | ||||
f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) | ||||
|
||||
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") | ||||
crf = 0 | ||||
quality_params = ["-crf", str(crf)] | ||||
# Some codecs (ex. MPEG4) do not support CRF. | ||||
# Flags not supported by the selected codec will be ignored. | ||||
ffmpeg_cmd = [ | ||||
"ffmpeg", | ||||
"-y", | ||||
"-f", | ||||
"rawvideo", | ||||
"-pix_fmt", | ||||
"rgb24", | ||||
"-s", | ||||
f"{source_frames.shape[3]}x{source_frames.shape[2]}", | ||||
"-r", | ||||
str(frame_rate), | ||||
"-i", | ||||
temp_raw_path, | ||||
*quality_params, | ||||
ffmpeg_encoded_path, | ||||
] | ||||
subprocess.run(ffmpeg_cmd, check=True) | ||||
|
||||
# Encode with our video encoder | ||||
encoder_output_path = str(tmp_path / f"encoder_output.{format}") | ||||
encode_video_to_file( | ||||
frames=source_frames, | ||||
frame_rate=frame_rate, | ||||
filename=encoder_output_path, | ||||
crf=crf, | ||||
) | ||||
|
||||
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data | ||||
encoder_frames = self.decode(encoder_output_path).data | ||||
|
||||
assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] | ||||
|
||||
# If FFmpeg selects a codec or pixel format that uses qscale (not crf), | ||||
# the VideoEncoder outputs *slightly* different frames. | ||||
# There may be additional subtle differences in the encoder. | ||||
percentage = 95 if ffmpeg_version == 6 or format in ("avi") else 99 | ||||
|
||||
# Check that PSNR between both encoded versions is high | ||||
for ff_frame, enc_frame in zip(ffmpeg_frames, encoder_frames): | ||||
res = psnr(ff_frame, enc_frame) | ||||
assert res > 30 | ||||
assert_tensor_close_on_at_least( | ||||
ff_frame, enc_frame, percentage=percentage, atol=2 | ||||
) | ||||
|
||||
|
||||
if __name__ == "__main__": | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Above, the
crf
parameter is reused to setqscale
to encode high quality videos in round-trip tests. But, the C++ function only allowscrf
to be set, notqscale
. Sinceqscale
is not needed anywhere else, I did not think it was worth including, but I am open to feedback here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the context.
My understanding is that for those codecs that do not support crf, we set instead the qscale (quantizer scale) parameter. They both control encoding quality, but in different ways.
I think... we should avoid doing that. I don't have a good enough understanding of how these 2 parameters (and their values!) relate to each other, and I think we can punt on that for a first release of the encoder. Especially since we only really need this workaround for our round-trip test to run. It means we won't be able to do the run-trip tests on those formats, but that's OK: