-
Notifications
You must be signed in to change notification settings - Fork 74
Add codec options to VideoEncoder API #1050
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
1d79594
e4d3ede
e6fd72b
888c8d4
5629e6e
36a2c41
7548687
f933cef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -570,10 +570,10 @@ AVPixelFormat validatePixelFormat( | |||||
| TORCH_CHECK(false, errorMsg.str()); | ||||||
| } | ||||||
|
|
||||||
| void validateDoubleOption( | ||||||
| void tryToValidateCodecOption( | ||||||
| const AVCodec& avCodec, | ||||||
| const char* optionName, | ||||||
| double value) { | ||||||
| const std::string& value) { | ||||||
| if (!avCodec.priv_class) { | ||||||
| return; | ||||||
| } | ||||||
|
|
@@ -586,24 +586,36 @@ void validateDoubleOption( | |||||
| 0, | ||||||
| AV_OPT_SEARCH_FAKE_OBJ, | ||||||
| nullptr); | ||||||
| // If the option was not found, let FFmpeg handle it later | ||||||
| // If option is not found we cannot validate it, let FFmpeg handle it | ||||||
| if (!option) { | ||||||
| return; | ||||||
| } | ||||||
| // Validate options defined as a numeric type | ||||||
| if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 || | ||||||
| option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) { | ||||||
| TORCH_CHECK( | ||||||
| value >= option->min && value <= option->max, | ||||||
| optionName, | ||||||
| "=", | ||||||
| value, | ||||||
| " is out of valid range [", | ||||||
| option->min, | ||||||
| ", ", | ||||||
| option->max, | ||||||
| "] for this codec. For more details, run 'ffmpeg -h encoder=", | ||||||
| avCodec.name, | ||||||
| "'"); | ||||||
| try { | ||||||
| double numericValue = std::stod(value); | ||||||
| TORCH_CHECK( | ||||||
| numericValue >= option->min && numericValue <= option->max, | ||||||
| optionName, | ||||||
| "=", | ||||||
| numericValue, | ||||||
| " is out of valid range [", | ||||||
| option->min, | ||||||
| ", ", | ||||||
| option->max, | ||||||
| "] for this codec. For more details, run 'ffmpeg -h encoder=", | ||||||
| avCodec.name, | ||||||
| "'"); | ||||||
| } catch (const std::invalid_argument& e) { | ||||||
| TORCH_CHECK( | ||||||
| false, | ||||||
| "Option ", | ||||||
| optionName, | ||||||
| " expects a numeric value but got '", | ||||||
| value, | ||||||
| "'"); | ||||||
| } | ||||||
| } | ||||||
| } | ||||||
| } // namespace | ||||||
|
|
@@ -685,6 +697,30 @@ VideoEncoder::VideoEncoder( | |||||
| initializeEncoder(videoStreamOptions); | ||||||
| } | ||||||
|
|
||||||
| void VideoEncoder::sortCodecOptions( | ||||||
| const std::map<std::string, std::string>& codecOptions, | ||||||
| AVDictionary** codecDict, | ||||||
| AVDictionary** formatDict) { | ||||||
|
||||||
| // Search AVFormatContext's AVClass for options | ||||||
|
||||||
| const AVClass* formatClass = avformat_get_class(); | ||||||
| for (const auto& [key, value] : codecOptions) { | ||||||
| const AVOption* fmtOpt = av_opt_find2( | ||||||
| &formatClass, | ||||||
| key.c_str(), | ||||||
| nullptr, | ||||||
| 0, | ||||||
| AV_OPT_SEARCH_CHILDREN | AV_OPT_SEARCH_FAKE_OBJ, | ||||||
| nullptr); | ||||||
| if (fmtOpt) { | ||||||
| av_dict_set(formatDict, key.c_str(), value.c_str(), 0); | ||||||
| } else { | ||||||
| // Default to codec option (includes AVCodecContext + encoder-private) | ||||||
| // validateCodecOption(*avCodecContext_->codec, key.c_str(), value); | ||||||
|
||||||
| // validateCodecOption(*avCodecContext_->codec, key.c_str(), value); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, I think we can remove this comment, it doesn't add much value
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's rename these variables so that it's clearer what they refer to:
| videoStreamOptions.codecOptions.value(), &options, &formatOptions_); | |
| videoStreamOptions.codecOptions.value(), &avCodecOptions, &avFormatOptions_); |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's free it in the destructor rather than here, it's slightly less surprising.
This makes me realize: we should avoid calling av_dict_free and define a unique_ptr type with RAII semantics on AVDictionary, like we do for all the other FFmpeg types.
Let's open an issue to follow-up on that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Opened #1053 to follow up on this, thanks for the suggestion
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,9 +1,15 @@ | ||
| #pragma once | ||
| #include <torch/types.h> | ||
| #include <map> | ||
| #include <string> | ||
| #include "AVIOContextHolder.h" | ||
| #include "FFMPEGCommon.h" | ||
| #include "StreamOptions.h" | ||
|
|
||
| extern "C" { | ||
| #include <libavutil/dict.h> | ||
| } | ||
|
|
||
| namespace facebook::torchcodec { | ||
| class AudioEncoder { | ||
| public: | ||
|
|
@@ -154,6 +160,10 @@ class VideoEncoder { | |
|
|
||
| private: | ||
| void initializeEncoder(const VideoStreamOptions& videoStreamOptions); | ||
| void sortCodecOptions( | ||
| const std::map<std::string, std::string>& codecOptions, | ||
| AVDictionary** codecDict, | ||
| AVDictionary** formatDict); | ||
|
||
| UniqueAVFrame convertTensorToAVFrame( | ||
| const torch::Tensor& frame, | ||
| int frameIndex); | ||
|
|
@@ -179,6 +189,7 @@ class VideoEncoder { | |
| std::unique_ptr<AVIOContextHolder> avioContextHolder_; | ||
|
|
||
| bool encodeWasCalled_ = false; | ||
| AVDictionary* formatOptions_ = nullptr; | ||
| }; | ||
|
|
||
| } // namespace facebook::torchcodec | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -216,6 +216,7 @@ def encode_video_to_file_like( | |
| crf: Optional[Union[int, float]] = None, | ||
| pixel_format: Optional[str] = None, | ||
| preset: Optional[str] = None, | ||
| codec_options: Optional[list[str]] = None, | ||
|
||
| ) -> None: | ||
| """Encode video frames to a file-like object. | ||
|
|
||
|
|
@@ -227,6 +228,7 @@ def encode_video_to_file_like( | |
| crf: Optional constant rate factor for encoding quality | ||
| pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p") | ||
| preset: Optional encoder preset as string (e.g., "ultrafast", "medium") | ||
| codec_options: Optional list of codec options as flattened key-value pairs | ||
| """ | ||
| assert _pybind_ops is not None | ||
|
|
||
|
|
@@ -238,6 +240,7 @@ def encode_video_to_file_like( | |
| pixel_format, | ||
| crf, | ||
| preset, | ||
| codec_options, | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -326,8 +329,9 @@ def encode_video_to_file_abstract( | |
| frame_rate: int, | ||
| filename: str, | ||
| pixel_format: Optional[str] = None, | ||
| crf: Optional[Union[int, float]] = None, | ||
| preset: Optional[str] = None, | ||
| crf: Optional[Union[int, float]] = None, | ||
| codec_options: Optional[list[str]] = None, | ||
| ) -> None: | ||
| return | ||
|
|
||
|
|
@@ -338,8 +342,9 @@ def encode_video_to_tensor_abstract( | |
| frame_rate: int, | ||
| format: str, | ||
| pixel_format: Optional[str] = None, | ||
| crf: Optional[Union[int, float]] = None, | ||
| preset: Optional[str] = None, | ||
| crf: Optional[Union[int, float]] = None, | ||
| codec_options: Optional[list[str]] = None, | ||
| ) -> torch.Tensor: | ||
| return torch.empty([], dtype=torch.long) | ||
|
|
||
|
|
@@ -351,8 +356,9 @@ def _encode_video_to_file_like_abstract( | |
| format: str, | ||
| file_like_context: int, | ||
| pixel_format: Optional[str] = None, | ||
| crf: Optional[Union[int, float]] = None, | ||
| preset: Optional[str] = None, | ||
| crf: Optional[Union[int, float]] = None, | ||
| codec_options: Optional[list[str]] = None, | ||
| ) -> None: | ||
| return | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is OK for now, I suspect
minandmaxmay not always be set on for all parameters, in which case we may error out when we shouldn't? We'll know if / when we get user reports about that. Let's keep it as-is for now and see if we need to revisit in the future.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We only access
min/maxon numeric parameters, my expectation (hope) it is populated for those.