Skip to content

Commit 2a49a3d

Browse files
committed
add preset option, update ffmpeg_cli test
1 parent 222e74d commit 2a49a3d

File tree

6 files changed

+82
-26
lines changed

6 files changed

+82
-26
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,10 @@ void VideoEncoder::initializeEncoder(
706706
std::to_string(videoStreamOptions.crf.value()).c_str(),
707707
0);
708708
}
709+
if (videoStreamOptions.preset.has_value()) {
710+
av_dict_set(
711+
&options, "preset", videoStreamOptions.preset.value().c_str(), 0);
712+
}
709713
int status = avcodec_open2(avCodecContext_.get(), avCodec, &options);
710714
av_dict_free(&options);
711715

src/torchcodec/_core/StreamOptions.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,11 @@ struct VideoStreamOptions {
4545
std::string_view deviceVariant = "ffmpeg";
4646

4747
// Encoding options
48-
// TODO-VideoEncoder: Consider adding other optional fields here
49-
// (bit rate, gop size, max b frames, preset)
50-
std::optional<int> crf;
51-
5248
// Optional pixel format for video encoding (e.g., "yuv420p", "yuv444p")
5349
// If not specified, uses codec's default format.
5450
std::optional<std::string> pixelFormat;
51+
std::optional<int> crf;
52+
std::optional<std::string> preset;
5553
};
5654

5755
struct AudioStreamOptions {

src/torchcodec/_core/custom_ops.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None, str? preset=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None, str? preset=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None, str? preset=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -603,11 +603,13 @@ void encode_video_to_file(
603603
const at::Tensor& frames,
604604
int64_t frame_rate,
605605
std::string_view file_name,
606-
std::optional<std::string> pixel_format = std::nullopt,
607-
std::optional<int64_t> crf = std::nullopt) {
606+
std::optional<std::string_view> pixel_format = std::nullopt,
607+
std::optional<int64_t> crf = std::nullopt,
608+
std::optional<std::string_view> preset = std::nullopt) {
608609
VideoStreamOptions videoStreamOptions;
609610
videoStreamOptions.pixelFormat = pixel_format;
610611
videoStreamOptions.crf = crf;
612+
videoStreamOptions.preset = preset;
611613
VideoEncoder(
612614
frames,
613615
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -620,12 +622,14 @@ at::Tensor encode_video_to_tensor(
620622
const at::Tensor& frames,
621623
int64_t frame_rate,
622624
std::string_view format,
623-
std::optional<std::string> pixel_format = std::nullopt,
624-
std::optional<int64_t> crf = std::nullopt) {
625+
std::optional<std::string_view> pixel_format = std::nullopt,
626+
std::optional<int64_t> crf = std::nullopt,
627+
std::optional<std::string_view> preset = std::nullopt) {
625628
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
626629
VideoStreamOptions videoStreamOptions;
627630
videoStreamOptions.pixelFormat = pixel_format;
628631
videoStreamOptions.crf = crf;
632+
videoStreamOptions.preset = preset;
629633
return VideoEncoder(
630634
frames,
631635
validateInt64ToInt(frame_rate, "frame_rate"),
@@ -640,8 +644,9 @@ void _encode_video_to_file_like(
640644
int64_t frame_rate,
641645
std::string_view format,
642646
int64_t file_like_context,
643-
std::optional<std::string> pixel_format = std::nullopt,
644-
std::optional<int64_t> crf = std::nullopt) {
647+
std::optional<std::string_view> pixel_format = std::nullopt,
648+
std::optional<int64_t> crf = std::nullopt,
649+
std::optional<std::string_view> preset = std::nullopt) {
645650
auto fileLikeContext =
646651
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);
647652
TORCH_CHECK(
@@ -651,6 +656,7 @@ void _encode_video_to_file_like(
651656
VideoStreamOptions videoStreamOptions;
652657
videoStreamOptions.pixelFormat = pixel_format;
653658
videoStreamOptions.crf = crf;
659+
videoStreamOptions.preset = preset;
654660

655661
VideoEncoder encoder(
656662
frames,

src/torchcodec/_core/ops.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ def encode_video_to_file_like(
215215
file_like: Union[io.RawIOBase, io.BufferedIOBase],
216216
crf: Optional[int] = None,
217217
pixel_format: Optional[str] = None,
218+
preset: Optional[str] = None,
218219
) -> None:
219220
"""Encode video frames to a file-like object.
220221
@@ -225,6 +226,7 @@ def encode_video_to_file_like(
225226
file_like: File-like object that supports write() and seek() methods
226227
crf: Optional constant rate factor for encoding quality
227228
pixel_format: Optional pixel format (e.g., "yuv420p", "yuv444p")
229+
preset: Optional encoder preset as string (e.g., "ultrafast", "medium")
228230
"""
229231
assert _pybind_ops is not None
230232

@@ -235,6 +237,7 @@ def encode_video_to_file_like(
235237
_pybind_ops.create_file_like_context(file_like, True), # True means for writing
236238
pixel_format,
237239
crf,
240+
preset,
238241
)
239242

240243

@@ -322,8 +325,9 @@ def encode_video_to_file_abstract(
322325
frames: torch.Tensor,
323326
frame_rate: int,
324327
filename: str,
325-
crf: Optional[int] = None,
326328
pixel_format: Optional[str] = None,
329+
crf: Optional[int] = None,
330+
preset: Optional[str] = None,
327331
) -> None:
328332
return
329333

@@ -333,8 +337,9 @@ def encode_video_to_tensor_abstract(
333337
frames: torch.Tensor,
334338
frame_rate: int,
335339
format: str,
336-
crf: Optional[int] = None,
337340
pixel_format: Optional[str] = None,
341+
crf: Optional[int] = None,
342+
preset: Optional[str] = None,
338343
) -> torch.Tensor:
339344
return torch.empty([], dtype=torch.long)
340345

@@ -345,8 +350,9 @@ def _encode_video_to_file_like_abstract(
345350
frame_rate: int,
346351
format: str,
347352
file_like_context: int,
348-
crf: Optional[int] = None,
349353
pixel_format: Optional[str] = None,
354+
crf: Optional[int] = None,
355+
preset: Optional[str] = None,
350356
) -> None:
351357
return
352358

src/torchcodec/encoders/_video_encoder.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ def to_file(
3838
*,
3939
pixel_format: Optional[str] = None,
4040
crf: Optional[int] = None,
41+
preset: Optional[Union[str, int]] = None,
4142
) -> None:
4243
"""Encode frames into a file.
4344
@@ -50,13 +51,19 @@ def to_file(
5051
crf (int, optional): Constant Rate Factor for encoding quality. Lower values
5152
mean better quality. Valid range depends on the encoder (commonly 0-51).
5253
Defaults to None (which will use encoder's default).
54+
preset (str or int, optional): Encoder option that controls the tradeoff between
55+
encoding speed and compression. Valid values depend on the encoder (commonly
56+
a string: "fast", "medium", "slow"). Defaults to None
57+
(which will use encoder's default).
5358
"""
59+
preset = str(preset) if isinstance(preset, int) else preset
5460
_core.encode_video_to_file(
5561
frames=self._frames,
5662
frame_rate=self._frame_rate,
5763
filename=str(dest),
5864
pixel_format=pixel_format,
5965
crf=crf,
66+
preset=preset,
6067
)
6168

6269
def to_tensor(
@@ -65,6 +72,7 @@ def to_tensor(
6572
*,
6673
pixel_format: Optional[str] = None,
6774
crf: Optional[int] = None,
75+
preset: Optional[Union[str, int]] = None,
6876
) -> Tensor:
6977
"""Encode frames into raw bytes, as a 1D uint8 Tensor.
7078
@@ -76,16 +84,23 @@ def to_tensor(
7684
crf (int, optional): Constant Rate Factor for encoding quality. Lower values
7785
mean better quality. Valid range depends on the encoder (commonly 0-51).
7886
Defaults to None (which will use encoder's default).
87+
preset (str or int, optional): Encoder option that controls the tradeoff between
88+
encoding speed and compression. Valid values depend on the encoder (commonly
89+
a string: "fast", "medium", "slow"). Defaults to None
90+
(which will use encoder's default).
7991
8092
Returns:
8193
Tensor: The raw encoded bytes as 4D uint8 Tensor.
8294
"""
95+
# Convert preset to string if it's an int
96+
preset_value = str(preset) if isinstance(preset, int) else preset
8397
return _core.encode_video_to_tensor(
8498
frames=self._frames,
8599
frame_rate=self._frame_rate,
86100
format=format,
87101
pixel_format=pixel_format,
88102
crf=crf,
103+
preset=preset_value,
89104
)
90105

91106
def to_file_like(
@@ -95,6 +110,7 @@ def to_file_like(
95110
*,
96111
pixel_format: Optional[str] = None,
97112
crf: Optional[int] = None,
113+
preset: Optional[Union[str, int]] = None,
98114
) -> None:
99115
"""Encode frames into a file-like object.
100116
@@ -111,12 +127,18 @@ def to_file_like(
111127
crf (int, optional): Constant Rate Factor for encoding quality. Lower values
112128
mean better quality. Valid range depends on the encoder (commonly 0-51).
113129
Defaults to None (which will use encoder's default).
130+
preset (str or int, optional): Encoder option that controls the tradeoff between
131+
encoding speed and compression. Valid values depend on the encoder (commonly
132+
a string: "fast", "medium", "slow"). Defaults to None
133+
(which will use encoder's default).
114134
"""
135+
preset = str(preset) if isinstance(preset, int) else preset
115136
_core.encode_video_to_file_like(
116137
frames=self._frames,
117138
frame_rate=self._frame_rate,
118139
format=format,
119140
file_like=file_like,
120141
pixel_format=pixel_format,
121142
crf=crf,
143+
preset=preset,
122144
)

test/test_encoders.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -801,13 +801,26 @@ def test_against_to_file(self, tmp_path, format, method):
801801
pytest.param("webm", marks=pytest.mark.slow),
802802
),
803803
)
804-
@pytest.mark.parametrize("pixel_format", ("yuv444p", "yuv420p"))
805-
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
804+
@pytest.mark.parametrize(
805+
"encode_params",
806+
[
807+
{"pixel_format": "yuv444p", "crf": 0, "preset": None},
808+
{"pixel_format": "yuv420p", "crf": 30, "preset": None},
809+
{"pixel_format": "yuv420p", "crf": None, "preset": "ultrafast"},
810+
{"pixel_format": "yuv420p", "crf": None, "preset": None},
811+
],
812+
)
813+
def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, encode_params):
806814
ffmpeg_version = get_ffmpeg_major_version()
807815
if format == "webm" and (
808816
ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
809817
):
810818
pytest.skip("Codec for webm is not available in this FFmpeg installation.")
819+
820+
pixel_format = encode_params["pixel_format"]
821+
crf = encode_params["crf"]
822+
preset = encode_params["preset"]
823+
811824
if format in ("avi", "flv") and pixel_format == "yuv444p":
812825
pytest.skip(f"Default codec for {format} does not support {pixel_format}")
813826

@@ -820,8 +833,7 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
820833

821834
ffmpeg_encoded_path = str(tmp_path / f"ffmpeg_output.{format}")
822835
frame_rate = 30
823-
crf = 0
824-
# Some codecs (ex. MPEG4) do not support CRF.
836+
# Some codecs (ex. MPEG4) do not support CRF or preset.
825837
# Flags not supported by the selected codec will be ignored.
826838
ffmpeg_cmd = [
827839
"ffmpeg",
@@ -836,18 +848,26 @@ def test_video_encoder_against_ffmpeg_cli(self, tmp_path, format, pixel_format):
836848
str(frame_rate),
837849
"-i",
838850
temp_raw_path,
839-
"-pix_fmt",
840-
pixel_format, # Output format
841-
"-crf",
842-
str(crf),
843-
ffmpeg_encoded_path,
844851
]
852+
if pixel_format is not None: # Output format
853+
ffmpeg_cmd.extend(["-pix_fmt", pixel_format])
854+
if preset is not None:
855+
ffmpeg_cmd.extend(["-preset", preset])
856+
if crf is not None:
857+
ffmpeg_cmd.extend(["-crf", str(crf)])
858+
# Output path must be last
859+
ffmpeg_cmd.append(ffmpeg_encoded_path)
845860
subprocess.run(ffmpeg_cmd, check=True)
846861

847862
# Encode with our video encoder
848863
encoder_output_path = str(tmp_path / f"encoder_output.{format}")
849864
encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
850-
encoder.to_file(dest=encoder_output_path, pixel_format=pixel_format, crf=crf)
865+
encoder.to_file(
866+
dest=encoder_output_path,
867+
pixel_format=pixel_format,
868+
crf=crf,
869+
preset=preset,
870+
)
851871

852872
ffmpeg_frames = self.decode(ffmpeg_encoded_path).data
853873
encoder_frames = self.decode(encoder_output_path).data

0 commit comments

Comments
 (0)