-
Notifications
You must be signed in to change notification settings - Fork 75
Update Video Encoder and tests for 6 container formats #913
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
56a96fb
f09fe35
5c94fda
f438729
1162beb
ea85cfe
f0fffca
444254e
4bac987
75c5b36
796499e
2cafa10
1aebfec
49d85d6
4ab1b63
5f2928f
2055291
e0e456c
d2b2f14
d7bb786
266f9f5
4516b35
163c5c2
d4017bd
185c656
4cc2eb3
b13b5db
160d449
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -4,6 +4,10 @@ | |||||
| #include "src/torchcodec/_core/Encoder.h" | ||||||
| #include "torch/types.h" | ||||||
|
|
||||||
| extern "C" { | ||||||
| #include <libavutil/pixdesc.h> | ||||||
| } | ||||||
|
|
||||||
| namespace facebook::torchcodec { | ||||||
|
|
||||||
| namespace { | ||||||
|
|
@@ -587,15 +591,6 @@ void VideoEncoder::initializeEncoder( | |||||
| TORCH_CHECK(avCodecContext != nullptr, "Couldn't allocate codec context."); | ||||||
| avCodecContext_.reset(avCodecContext); | ||||||
|
|
||||||
| // Set encoding options | ||||||
| // TODO-VideoEncoder: Allow bitrate to be set | ||||||
| std::optional<int> desiredBitRate = videoStreamOptions.bitRate; | ||||||
| if (desiredBitRate.has_value()) { | ||||||
| TORCH_CHECK( | ||||||
| *desiredBitRate >= 0, "bit_rate=", *desiredBitRate, " must be >= 0."); | ||||||
| } | ||||||
| avCodecContext_->bit_rate = desiredBitRate.value_or(0); | ||||||
|
|
||||||
| // Store dimension order and input pixel format | ||||||
| // TODO-VideoEncoder: Remove assumption that tensor in NCHW format | ||||||
| auto sizes = frames_.sizes(); | ||||||
|
|
@@ -608,9 +603,15 @@ void VideoEncoder::initializeEncoder( | |||||
| outWidth_ = inWidth_; | ||||||
| outHeight_ = inHeight_; | ||||||
|
|
||||||
| // Use YUV420P as default output format | ||||||
| // TODO-VideoEncoder: Enable other pixel formats | ||||||
| outPixelFormat_ = AV_PIX_FMT_YUV420P; | ||||||
| // Let FFmpeg choose best pixel format to minimize loss | ||||||
| outPixelFormat_ = avcodec_find_best_pix_fmt_of_list( | ||||||
| getSupportedPixelFormats(*avCodec), // List of supported formats | ||||||
| AV_PIX_FMT_GBRP, // We reorder input to GBRP currently | ||||||
| 0, // No alpha channel | ||||||
| 0 // Discard conversion loss information | ||||||
| ); | ||||||
| TORCH_CHECK(outPixelFormat_ != -1, "Failed to find best pix fmt") | ||||||
|
|
||||||
| // Configure codec parameters | ||||||
| avCodecContext_->codec_id = avCodec->id; | ||||||
|
|
@@ -621,37 +622,39 @@ void VideoEncoder::initializeEncoder( | |||||
| avCodecContext_->time_base = {1, inFrameRate_}; | ||||||
| avCodecContext_->framerate = {inFrameRate_, 1}; | ||||||
|
|
||||||
| // TODO-VideoEncoder: Allow GOP size and max B-frames to be set | ||||||
| if (videoStreamOptions.gopSize.has_value()) { | ||||||
| avCodecContext_->gop_size = *videoStreamOptions.gopSize; | ||||||
| } else { | ||||||
| avCodecContext_->gop_size = 12; // Default GOP size | ||||||
| // Set flag for containers that require extradata to be in the codec context | ||||||
| if (avFormatContext_->oformat->flags & AVFMT_GLOBALHEADER) { | ||||||
| avCodecContext_->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; | ||||||
| } | ||||||
|
|
||||||
| if (videoStreamOptions.maxBFrames.has_value()) { | ||||||
| avCodecContext_->max_b_frames = *videoStreamOptions.maxBFrames; | ||||||
| } else { | ||||||
| avCodecContext_->max_b_frames = 0; // No max B-frames to reduce compression | ||||||
| // Apply videoStreamOptions | ||||||
| AVDictionary* options = nullptr; | ||||||
| if (videoStreamOptions.crf.has_value()) { | ||||||
| av_dict_set( | ||||||
| &options, | ||||||
| "crf", | ||||||
| std::to_string(videoStreamOptions.crf.value()).c_str(), | ||||||
| 0); | ||||||
| } | ||||||
| int status = avcodec_open2(avCodecContext_.get(), avCodec, &options); | ||||||
| av_dict_free(&options); | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess this is OK, if we start using torchcodec/src/torchcodec/_core/FFMPEGCommon.h Lines 74 to 75 in e5b2eef
|
||||||
|
|
||||||
| int status = avcodec_open2(avCodecContext_.get(), avCodec, nullptr); | ||||||
| TORCH_CHECK( | ||||||
| status == AVSUCCESS, | ||||||
| "avcodec_open2 failed: ", | ||||||
| getFFMPEGErrorStringFromErrorCode(status)); | ||||||
|
|
||||||
| AVStream* avStream = avformat_new_stream(avFormatContext_.get(), nullptr); | ||||||
| TORCH_CHECK(avStream != nullptr, "Couldn't create new stream."); | ||||||
| avStream_ = avformat_new_stream(avFormatContext_.get(), nullptr); | ||||||
| TORCH_CHECK(avStream_ != nullptr, "Couldn't create new stream."); | ||||||
|
|
||||||
| // Set the stream time base to encode correct frame timestamps | ||||||
| avStream->time_base = avCodecContext_->time_base; | ||||||
| avStream_->time_base = avCodecContext_->time_base; | ||||||
| status = avcodec_parameters_from_context( | ||||||
| avStream->codecpar, avCodecContext_.get()); | ||||||
| avStream_->codecpar, avCodecContext_.get()); | ||||||
| TORCH_CHECK( | ||||||
| status == AVSUCCESS, | ||||||
| "avcodec_parameters_from_context failed: ", | ||||||
| getFFMPEGErrorStringFromErrorCode(status)); | ||||||
| streamIndex_ = avStream->index; | ||||||
| } | ||||||
|
|
||||||
| void VideoEncoder::encode() { | ||||||
|
|
@@ -694,7 +697,7 @@ UniqueAVFrame VideoEncoder::convertTensorToAVFrame( | |||||
| outWidth_, | ||||||
| outHeight_, | ||||||
| outPixelFormat_, | ||||||
| SWS_BILINEAR, | ||||||
| SWS_BICUBIC, // Used by FFmpeg CLI | ||||||
| nullptr, | ||||||
| nullptr, | ||||||
| nullptr)); | ||||||
|
|
@@ -757,7 +760,7 @@ void VideoEncoder::encodeFrame( | |||||
| "Error while sending frame: ", | ||||||
| getFFMPEGErrorStringFromErrorCode(status)); | ||||||
|
|
||||||
| while (true) { | ||||||
| while (status >= 0) { | ||||||
| ReferenceAVPacket packet(autoAVPacket); | ||||||
| status = avcodec_receive_packet(avCodecContext_.get(), packet.get()); | ||||||
| if (status == AVERROR(EAGAIN) || status == AVERROR_EOF) { | ||||||
|
|
@@ -776,7 +779,16 @@ void VideoEncoder::encodeFrame( | |||||
| "Error receiving packet: ", | ||||||
| getFFMPEGErrorStringFromErrorCode(status)); | ||||||
|
|
||||||
| packet->stream_index = streamIndex_; | ||||||
| // The code below is borrowed from torchaudio: | ||||||
| // https://github.com/pytorch/audio/blob/b6a3368a45aaafe05f1a6a9f10c68adc5e944d9e/src/libtorio/ffmpeg/stream_writer/encoder.cpp#L46 | ||||||
| // Setting packet->duration to 1 allows the last frame to be properly | ||||||
| // encoded, and needs to be set before calling av_packet_rescale_ts. | ||||||
| if (packet->duration == 0) { | ||||||
| packet->duration = 1; | ||||||
| } | ||||||
| av_packet_rescale_ts( | ||||||
| packet.get(), avCodecContext_->time_base, avStream_->time_base); | ||||||
| packet->stream_index = avStream_->index; | ||||||
|
|
||||||
| status = av_interleaved_write_frame(avFormatContext_.get(), packet.get()); | ||||||
| TORCH_CHECK( | ||||||
|
|
||||||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -9,8 +9,6 @@ | |||||
| import os | ||||||
| from functools import partial | ||||||
|
|
||||||
| from .utils import in_fbcode | ||||||
|
|
||||||
| os.environ["TORCH_LOGS"] = "output_code" | ||||||
| import json | ||||||
| import subprocess | ||||||
|
|
@@ -47,6 +45,10 @@ | |||||
| from .utils import ( | ||||||
| all_supported_devices, | ||||||
| assert_frames_equal, | ||||||
| assert_tensor_close_on_at_least, | ||||||
| get_ffmpeg_major_version, | ||||||
| in_fbcode, | ||||||
| IS_WINDOWS, | ||||||
| NASA_AUDIO, | ||||||
| NASA_AUDIO_MP3, | ||||||
| NASA_VIDEO, | ||||||
|
|
@@ -55,6 +57,7 @@ | |||||
| SINE_MONO_S32, | ||||||
| SINE_MONO_S32_44100, | ||||||
| SINE_MONO_S32_8000, | ||||||
| TEST_SRC_2_720P, | ||||||
| unsplit_device_str, | ||||||
| ) | ||||||
|
|
||||||
|
|
@@ -1375,30 +1378,123 @@ def test_bad_input(self, tmp_path): | |||||
| filename="./bad/path.mp3", | ||||||
| ) | ||||||
|
|
||||||
| def decode(self, file_path) -> torch.Tensor: | ||||||
| def decode(self, file_path, device="cpu") -> torch.Tensor: | ||||||
|
||||||
| decoder = create_from_file(str(file_path), seek_mode="approximate") | ||||||
| add_video_stream(decoder) | ||||||
| add_video_stream(decoder, device=device) | ||||||
| frames, *_ = get_frames_in_range(decoder, start=0, stop=60) | ||||||
| return frames | ||||||
|
|
||||||
| @pytest.mark.parametrize("format", ("mov", "mp4", "avi")) | ||||||
| # TODO-VideoEncoder: enable additional formats (mkv, webm) | ||||||
| def test_video_encoder_test_round_trip(self, tmp_path, format): | ||||||
| # TODO-VideoEncoder: Test with FFmpeg's testsrc2 video | ||||||
| asset = NASA_VIDEO | ||||||
|
|
||||||
| @pytest.mark.parametrize("format", ("mov", "mp4", "mkv", "webm")) | ||||||
| def test_video_encoder_round_trip(self, tmp_path, format): | ||||||
| # Test that decode(encode(decode(asset))) == decode(asset) | ||||||
| source_frames = self.decode(str(asset.path)).data | ||||||
| ffmpeg_version = get_ffmpeg_major_version() | ||||||
| # In FFmpeg6, the default codec's best pixel format is lossy for all container formats but webm. | ||||||
| # As a result, we skip the round trip test. | ||||||
| if ffmpeg_version == 6 and format != "webm": | ||||||
| pytest.skip( | ||||||
| f"FFmpeg6 defaults to lossy encoding for {format}, skipping round-trip test." | ||||||
| ) | ||||||
| if format == "webm" and ( | ||||||
| ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7)) | ||||||
| ): | ||||||
| pytest.skip("Codec for webm is not available in this FFmpeg installation.") | ||||||
| asset = TEST_SRC_2_720P | ||||||
| source_frames = self.decode(str(asset.path), device="cpu").data | ||||||
|
|
||||||
| encoded_path = str(tmp_path / f"encoder_output.{format}") | ||||||
| frame_rate = 30 # Frame rate is fixed with num frames decoded | ||||||
| encode_video_to_file(source_frames, frame_rate, encoded_path) | ||||||
| round_trip_frames = self.decode(encoded_path).data | ||||||
|
|
||||||
| # Check that PSNR for decode(encode(samples)) is above 30 | ||||||
| encode_video_to_file( | ||||||
| frames=source_frames, frame_rate=frame_rate, filename=encoded_path, crf=0 | ||||||
| ) | ||||||
| round_trip_frames = self.decode(encoded_path, device="cpu").data | ||||||
| assert source_frames.shape == round_trip_frames.shape | ||||||
| assert source_frames.dtype == round_trip_frames.dtype | ||||||
|
|
||||||
| # If FFmpeg selects a codec or pixel format that does lossy encoding, assert 99% of pixels | ||||||
| # are within a higher tolerance. | ||||||
| if ffmpeg_version == 6: | ||||||
| assert_close = partial(assert_tensor_close_on_at_least, percentage=99) | ||||||
| atol = 15 | ||||||
| else: | ||||||
| assert_close = torch.testing.assert_close | ||||||
| atol = 2 | ||||||
| for s_frame, rt_frame in zip(source_frames, round_trip_frames): | ||||||
| res = psnr(s_frame, rt_frame) | ||||||
| assert psnr(s_frame, rt_frame) > 30 | ||||||
| assert_close(s_frame, rt_frame, atol=atol, rtol=0) | ||||||
|
|
||||||
| @pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available") | ||||||
NicolasHug marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| @pytest.mark.parametrize( | ||||||
| "format", ("mov", "mp4", "avi", "mkv", "webm", "flv", "gif") | ||||||
| ) | ||||||
| def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format): | ||||||
| ffmpeg_version = get_ffmpeg_major_version() | ||||||
| if format == "webm": | ||||||
| if ffmpeg_version == 4: | ||||||
| pytest.skip( | ||||||
| "Codec for webm is not available in the FFmpeg4 installation." | ||||||
| ) | ||||||
| if IS_WINDOWS and ffmpeg_version in (6, 7): | ||||||
| pytest.skip( | ||||||
| "Codec for webm is not available in the FFmpeg6/7 installation on Windows." | ||||||
| ) | ||||||
| asset = TEST_SRC_2_720P | ||||||
| source_frames = self.decode(str(asset.path)).data | ||||||
| frame_rate = 30 | ||||||
|
|
||||||
| # Encode with FFmpeg CLI | ||||||
| temp_raw_path = str(tmp_path / "temp_input.raw") | ||||||
| with open(temp_raw_path, "wb") as f: | ||||||
| f.write(source_frames.permute(0, 2, 3, 1).cpu().numpy().tobytes()) | ||||||
|
|
||||||
| ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}") | ||||||
| crf = 0 | ||||||
| quality_params = ["-crf", str(crf)] | ||||||
| # Some codecs (ex. MPEG4) do not support CRF. | ||||||
| # Flags not supported by the selected codec will be ignored. | ||||||
| ffmpeg_cmd = [ | ||||||
| "ffmpeg", | ||||||
| "-y", | ||||||
| "-f", | ||||||
| "rawvideo", | ||||||
| "-pix_fmt", | ||||||
| "rgb24", | ||||||
| "-s", | ||||||
| f"{source_frames.shape[3]}x{source_frames.shape[2]}", | ||||||
| "-r", | ||||||
| str(frame_rate), | ||||||
| "-i", | ||||||
| temp_raw_path, | ||||||
| *quality_params, | ||||||
| ffmpeg_encoded_path, | ||||||
| ] | ||||||
| subprocess.run(ffmpeg_cmd, check=True) | ||||||
|
|
||||||
| # Encode with our video encoder | ||||||
| encoder_output_path = str(tmp_path / f"encoder_output.{format}") | ||||||
| encode_video_to_file( | ||||||
| frames=source_frames, | ||||||
| frame_rate=frame_rate, | ||||||
| filename=encoder_output_path, | ||||||
| crf=crf, | ||||||
| ) | ||||||
|
|
||||||
| ffmpeg_frames = self.decode(ffmpeg_encoded_path).data | ||||||
| encoder_frames = self.decode(encoder_output_path).data | ||||||
|
|
||||||
| assert ffmpeg_frames.shape[0] == encoder_frames.shape[0] | ||||||
|
|
||||||
| # If FFmpeg selects a codec or pixel format that uses qscale (not crf), | ||||||
| # the VideoEncoder outputs *slightly* different frames. | ||||||
| # There may be additional subtle differences in the encoder. | ||||||
| percentage = 94 if ffmpeg_version == 6 or format in ("avi") else 99 | ||||||
|
||||||
| percentage = 94 if ffmpeg_version == 6 or format in ("avi") else 99 | |
| percentage = 94 if ffmpeg_version == 6 or format == "avi" else 99 |
Uh oh!
There was an error while loading. Please reload this page.