Skip to content

Commit 682e7cb

Browse files
committed
Merge branch 'crf_encode_option' of https://github.com/Dan-Flores/torchcodec into preset_encode_option
2 parents a9d2a93 + b7e52fb commit 682e7cb

File tree

5 files changed

+86
-22
lines changed

5 files changed

+86
-22
lines changed

src/torchcodec/_core/Encoder.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "torch/types.h"
66

77
extern "C" {
8+
#include <libavutil/opt.h>
89
#include <libavutil/pixdesc.h>
910
}
1011

@@ -568,6 +569,43 @@ AVPixelFormat validatePixelFormat(
568569
}
569570
TORCH_CHECK(false, errorMsg.str());
570571
}
572+
573+
void validateDoubleOption(
574+
const AVCodec& avCodec,
575+
const char* optionName,
576+
double value) {
577+
if (!avCodec.priv_class) {
578+
return;
579+
}
580+
const AVOption* option = av_opt_find2(
581+
// Convert obj arg from const AVClass* const* to non-const void*
582+
// First cast to remove const, then cast to void*
583+
const_cast<void*>(static_cast<const void*>(&avCodec.priv_class)),
584+
optionName,
585+
nullptr,
586+
0,
587+
AV_OPT_SEARCH_FAKE_OBJ,
588+
nullptr);
589+
// If the option was not found, let FFmpeg handle it later
590+
if (!option) {
591+
return;
592+
}
593+
if (option->type == AV_OPT_TYPE_INT || option->type == AV_OPT_TYPE_INT64 ||
594+
option->type == AV_OPT_TYPE_FLOAT || option->type == AV_OPT_TYPE_DOUBLE) {
595+
TORCH_CHECK(
596+
value >= option->min && value <= option->max,
597+
optionName,
598+
"=",
599+
value,
600+
" is out of valid range [",
601+
option->min,
602+
", ",
603+
option->max,
604+
"] for this codec. For more details, run 'ffmpeg -h encoder=",
605+
avCodec.name,
606+
"'");
607+
}
608+
}
571609
} // namespace
572610

573611
VideoEncoder::~VideoEncoder() {
@@ -700,6 +738,7 @@ void VideoEncoder::initializeEncoder(
700738
// Apply videoStreamOptions
701739
AVDictionary* options = nullptr;
702740
if (videoStreamOptions.crf.has_value()) {
741+
validateDoubleOption(*avCodec, "crf", videoStreamOptions.crf.value());
703742
av_dict_set(
704743
&options,
705744
"crf",

src/torchcodec/_core/custom_ops.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ TORCH_LIBRARY(torchcodec_ns, m) {
3737
m.def(
3838
"_encode_audio_to_file_like(Tensor samples, int sample_rate, str format, int file_like_context, int? bit_rate=None, int? num_channels=None, int? desired_sample_rate=None) -> ()");
3939
m.def(
40-
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, int? crf=None, str? preset=None) -> ()");
40+
"encode_video_to_file(Tensor frames, int frame_rate, str filename, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4141
m.def(
42-
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, int? crf=None, str? preset=None) -> Tensor");
42+
"encode_video_to_tensor(Tensor frames, int frame_rate, str format, str? pixel_format=None, float? crf=None, str? preset=None) -> Tensor");
4343
m.def(
44-
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, int? crf=None, str? preset=None) -> ()");
44+
"_encode_video_to_file_like(Tensor frames, int frame_rate, str format, int file_like_context, str? pixel_format=None, float? crf=None, str? preset=None) -> ()");
4545
m.def(
4646
"create_from_tensor(Tensor video_tensor, str? seek_mode=None) -> Tensor");
4747
m.def(
@@ -604,7 +604,7 @@ void encode_video_to_file(
604604
int64_t frame_rate,
605605
std::string_view file_name,
606606
std::optional<std::string_view> pixel_format = std::nullopt,
607-
std::optional<int64_t> crf = std::nullopt,
607+
std::optional<double> crf = std::nullopt,
608608
std::optional<std::string_view> preset = std::nullopt) {
609609
VideoStreamOptions videoStreamOptions;
610610
videoStreamOptions.pixelFormat = pixel_format;
@@ -623,7 +623,7 @@ at::Tensor encode_video_to_tensor(
623623
int64_t frame_rate,
624624
std::string_view format,
625625
std::optional<std::string_view> pixel_format = std::nullopt,
626-
std::optional<int64_t> crf = std::nullopt,
626+
std::optional<double> crf = std::nullopt,
627627
std::optional<std::string_view> preset = std::nullopt) {
628628
auto avioContextHolder = std::make_unique<AVIOToTensorContext>();
629629
VideoStreamOptions videoStreamOptions;
@@ -645,7 +645,7 @@ void _encode_video_to_file_like(
645645
std::string_view format,
646646
int64_t file_like_context,
647647
std::optional<std::string_view> pixel_format = std::nullopt,
648-
std::optional<int64_t> crf = std::nullopt,
648+
std::optional<double> crf = std::nullopt,
649649
std::optional<std::string_view> preset = std::nullopt) {
650650
auto fileLikeContext =
651651
reinterpret_cast<AVIOFileLikeContext*>(file_like_context);

src/torchcodec/_core/ops.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ def encode_video_to_file_like(
213213
frame_rate: int,
214214
format: str,
215215
file_like: Union[io.RawIOBase, io.BufferedIOBase],
216-
crf: Optional[int] = None,
216+
crf: Optional[Union[int, float]] = None,
217217
pixel_format: Optional[str] = None,
218218
preset: Optional[str] = None,
219219
) -> None:
@@ -326,7 +326,7 @@ def encode_video_to_file_abstract(
326326
frame_rate: int,
327327
filename: str,
328328
pixel_format: Optional[str] = None,
329-
crf: Optional[int] = None,
329+
crf: Optional[Union[int, float]] = None,
330330
preset: Optional[str] = None,
331331
) -> None:
332332
return
@@ -338,7 +338,7 @@ def encode_video_to_tensor_abstract(
338338
frame_rate: int,
339339
format: str,
340340
pixel_format: Optional[str] = None,
341-
crf: Optional[int] = None,
341+
crf: Optional[Union[int, float]] = None,
342342
preset: Optional[str] = None,
343343
) -> torch.Tensor:
344344
return torch.empty([], dtype=torch.long)
@@ -351,7 +351,7 @@ def _encode_video_to_file_like_abstract(
351351
format: str,
352352
file_like_context: int,
353353
pixel_format: Optional[str] = None,
354-
crf: Optional[int] = None,
354+
crf: Optional[Union[int, float]] = None,
355355
preset: Optional[str] = None,
356356
) -> None:
357357
return

src/torchcodec/encoders/_video_encoder.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def to_file(
3737
dest: Union[str, Path],
3838
*,
3939
pixel_format: Optional[str] = None,
40-
crf: Optional[int] = None,
40+
crf: Optional[Union[int, float]] = None,
4141
preset: Optional[Union[str, int]] = None,
4242
) -> None:
4343
"""Encode frames into a file.
@@ -48,7 +48,7 @@ def to_file(
4848
container format.
4949
pixel_format (str, optional): The pixel format for encoding (e.g.,
5050
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
51-
crf (int, optional): Constant Rate Factor for encoding quality. Lower values
51+
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
5252
mean better quality. Valid range depends on the encoder (commonly 0-51).
5353
Defaults to None (which will use encoder's default).
5454
preset (str or int, optional): Encoder option that controls the tradeoff between
@@ -71,7 +71,7 @@ def to_tensor(
7171
format: str,
7272
*,
7373
pixel_format: Optional[str] = None,
74-
crf: Optional[int] = None,
74+
crf: Optional[Union[int, float]] = None,
7575
preset: Optional[Union[str, int]] = None,
7676
) -> Tensor:
7777
"""Encode frames into raw bytes, as a 1D uint8 Tensor.
@@ -81,7 +81,7 @@ def to_tensor(
8181
"mkv", "avi", "webm", "flv", etc.
8282
pixel_format (str, optional): The pixel format to encode frames into (e.g.,
8383
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
84-
crf (int, optional): Constant Rate Factor for encoding quality. Lower values
84+
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
8585
mean better quality. Valid range depends on the encoder (commonly 0-51).
8686
Defaults to None (which will use encoder's default).
8787
preset (str or int, optional): Encoder option that controls the tradeoff between
@@ -109,7 +109,7 @@ def to_file_like(
109109
format: str,
110110
*,
111111
pixel_format: Optional[str] = None,
112-
crf: Optional[int] = None,
112+
crf: Optional[Union[int, float]] = None,
113113
preset: Optional[Union[str, int]] = None,
114114
) -> None:
115115
"""Encode frames into a file-like object.
@@ -124,7 +124,7 @@ def to_file_like(
124124
"mkv", "avi", "webm", "flv", etc.
125125
pixel_format (str, optional): The pixel format for encoding (e.g.,
126126
"yuv420p", "yuv444p"). If not specified, uses codec's default format.
127-
crf (int, optional): Constant Rate Factor for encoding quality. Lower values
127+
crf (int or float, optional): Constant Rate Factor for encoding quality. Lower values
128128
mean better quality. Valid range depends on the encoder (commonly 0-51).
129129
Defaults to None (which will use encoder's default).
130130
preset (str or int, optional): Encoder option that controls the tradeoff between

test/test_encoders.py

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -610,6 +610,37 @@ def test_bad_input_parameterized(self, tmp_path, method):
610610
)
611611
getattr(encoder, method)(**valid_params)
612612

613+
with pytest.raises(RuntimeError, match=r"crf=-10 is out of valid range"):
614+
encoder = VideoEncoder(
615+
frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
616+
frame_rate=30,
617+
)
618+
getattr(encoder, method)(**valid_params, crf=-10)
619+
620+
with pytest.raises(
621+
RuntimeError,
622+
match=r"avcodec_open2 failed: Invalid argument",
623+
):
624+
encoder.to_tensor(format="mp4", preset="fake_preset")
625+
626+
@pytest.mark.parametrize("method", ["to_file", "to_tensor", "to_file_like"])
627+
@pytest.mark.parametrize("crf", [23, 23.5, -0.9])
628+
def test_crf_valid_values(self, method, crf, tmp_path):
629+
if method == "to_file":
630+
valid_params = {"dest": str(tmp_path / "test.mp4")}
631+
elif method == "to_tensor":
632+
valid_params = {"format": "mp4"}
633+
elif method == "to_file_like":
634+
valid_params = dict(file_like=io.BytesIO(), format="mp4")
635+
else:
636+
raise ValueError(f"Unknown method: {method}")
637+
638+
encoder = VideoEncoder(
639+
frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
640+
frame_rate=30,
641+
)
642+
getattr(encoder, method)(**valid_params, crf=crf)
643+
613644
def test_bad_input(self):
614645
encoder = VideoEncoder(
615646
frames=torch.zeros((5, 3, 64, 64), dtype=torch.uint8),
@@ -634,12 +665,6 @@ def test_bad_input(self):
634665
):
635666
encoder.to_tensor(format="bad_format")
636667

637-
with pytest.raises(
638-
RuntimeError,
639-
match=r"avcodec_open2 failed: Invalid argument",
640-
):
641-
encoder.to_tensor(format="mp4", preset="fake_preset")
642-
643668
@pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
644669
def test_pixel_format_errors(self, method, tmp_path):
645670
frames = torch.zeros((5, 3, 64, 64), dtype=torch.uint8)

0 commit comments

Comments
 (0)