Skip to content

Commit e820cb0

Browse files
author
pytorchbot
committed
2025-10-11 nightly release (e5b2eef)
1 parent de4837e commit e820cb0

File tree

13 files changed

+219
-55
lines changed

13 files changed

+219
-55
lines changed

.github/workflows/linux_cuda_wheel.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ jobs:
7171
# but for releases we should add 12.8.
7272
cuda-version: ['12.6', '13.0']
7373
# TODO: put back ffmpeg 5 https://github.com/pytorch/torchcodec/issues/325
74-
ffmpeg-version-for-tests: ['4.4.2', '6', '7']
74+
ffmpeg-version-for-tests: ['4.4.2', '6', '7', '8.0']
7575

7676
container:
7777
image: "pytorch/manylinux2_28-builder:cuda${{ matrix.cuda-version }}"

.github/workflows/linux_wheel.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ jobs:
6363
fail-fast: false
6464
matrix:
6565
python-version: ['3.10']
66-
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
66+
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0']
6767
needs: build
6868
steps:
6969
- uses: actions/download-artifact@v4

.github/workflows/macos_wheel.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ jobs:
6565
fail-fast: false
6666
matrix:
6767
python-version: ['3.10']
68-
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1']
68+
ffmpeg-version-for-tests: ['4.4.2', '5.1.2', '6.1.1', '7.0.1', '8.0']
6969
needs: build
7070
steps:
7171
- name: Download wheel

.github/workflows/windows_wheel.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ jobs:
7171
# TODO: FFmpeg 5 on Windows segfaults in avcodec_open2() when passing
7272
# bad parameters.
7373
# See https://github.com/pytorch/torchcodec/pull/806
74+
# TODO: Support FFmpeg 8 on Windows
7475
ffmpeg-version-for-tests: ['4.4.2', '6.1.1', '7.0.1']
7576
needs: build
7677
steps:

benchmarks/decoders/gpu_benchmark.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,17 @@ def decode_full_video(video_path, decode_device_string, resize_device_string):
2929
num_threads = None
3030
if "cuda" in decode_device_string:
3131
num_threads = 1
32-
width = None
33-
height = None
32+
33+
resize_spec = ""
3434
if "native" in resize_device_string:
35-
width = RESIZED_WIDTH
36-
height = RESIZED_HEIGHT
35+
resize_spec = f"resize, {RESIZED_HEIGHT}, {RESIZED_WIDTH}"
36+
3737
torchcodec._core._add_video_stream(
3838
decoder,
3939
stream_index=-1,
4040
device=decode_device_string,
4141
num_threads=num_threads,
42-
width=width,
43-
height=height,
42+
transform_specs=resize_spec,
4443
)
4544

4645
start_time = time.time()

src/torchcodec/_core/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,12 @@ if(DEFINED ENV{BUILD_AGAINST_ALL_FFMPEG_FROM_S3})
263263
you still need a different FFmpeg to be installed for run time!"
264264
)
265265

266-
# This will expose the ffmpeg4, ffmpeg5, ffmpeg6, and ffmpeg7 targets
266+
# This will expose the ffmpeg4, ffmpeg5, ffmpeg6, ffmpeg7, and ffmpeg8 targets
267267
include(
268268
${CMAKE_CURRENT_SOURCE_DIR}/fetch_and_expose_non_gpl_ffmpeg_libs.cmake
269269
)
270270

271+
make_torchcodec_libraries(8 ffmpeg8)
271272
make_torchcodec_libraries(7 ffmpeg7)
272273
make_torchcodec_libraries(6 ffmpeg6)
273274
make_torchcodec_libraries(4 ffmpeg4)

src/torchcodec/_core/custom_ops.cpp

Lines changed: 71 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,9 @@ TORCH_LIBRARY(torchcodec_ns, m) {
4343
m.def(
4444
"_create_from_file_like(int file_like_context, str? seek_mode=None) -> Tensor");
4545
m.def(
46-
"_add_video_stream(Tensor(a!) decoder, *, int? width=None, int? height=None, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
46+
"_add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None, str? color_conversion_library=None) -> ()");
4747
m.def(
48-
"add_video_stream(Tensor(a!) decoder, *, int? width=None, int? height=None, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
48+
"add_video_stream(Tensor(a!) decoder, *, int? num_threads=None, str? dimension_order=None, int? stream_index=None, str device=\"cpu\", str device_variant=\"default\", str transform_specs=\"\", (Tensor, Tensor, Tensor)? custom_frame_mappings=None) -> ()");
4949
m.def(
5050
"add_audio_stream(Tensor(a!) decoder, *, int? stream_index=None, int? sample_rate=None, int? num_channels=None) -> ()");
5151
m.def("seek_to_pts(Tensor(a!) decoder, float seconds) -> ()");
@@ -183,6 +183,69 @@ SingleStreamDecoder::SeekMode seekModeFromString(std::string_view seekMode) {
183183
}
184184
}
185185

186+
int checkedToPositiveInt(const std::string& str) {
187+
int ret = 0;
188+
try {
189+
ret = std::stoi(str);
190+
} catch (const std::invalid_argument&) {
191+
TORCH_CHECK(false, "String cannot be converted to an int:" + str);
192+
} catch (const std::out_of_range&) {
193+
TORCH_CHECK(false, "String would become integer out of range:" + str);
194+
}
195+
TORCH_CHECK(ret > 0, "String must be a positive integer:" + str);
196+
return ret;
197+
}
198+
199+
// Resize transform specs take the form:
200+
//
201+
// "resize, <height>, <width>"
202+
//
203+
// Where "resize" is the string literal and <height> and <width> are positive
204+
// integers.
205+
Transform* makeResizeTransform(
206+
const std::vector<std::string>& resizeTransformSpec) {
207+
TORCH_CHECK(
208+
resizeTransformSpec.size() == 3,
209+
"resizeTransformSpec must have 3 elements including its name");
210+
int height = checkedToPositiveInt(resizeTransformSpec[1]);
211+
int width = checkedToPositiveInt(resizeTransformSpec[2]);
212+
return new ResizeTransform(FrameDims(height, width));
213+
}
214+
215+
std::vector<std::string> split(const std::string& str, char delimiter) {
216+
std::vector<std::string> tokens;
217+
std::string token;
218+
std::istringstream tokenStream(str);
219+
while (std::getline(tokenStream, token, delimiter)) {
220+
tokens.push_back(token);
221+
}
222+
return tokens;
223+
}
224+
225+
// The transformSpecsRaw string is always in the format:
226+
//
227+
// "name1, param1, param2, ...; name2, param1, param2, ...; ..."
228+
//
229+
// Where "nameX" is the name of the transform, and "paramX" are the parameters.
230+
std::vector<Transform*> makeTransforms(const std::string& transformSpecsRaw) {
231+
std::vector<Transform*> transforms;
232+
std::vector<std::string> transformSpecs = split(transformSpecsRaw, ';');
233+
for (const std::string& transformSpecRaw : transformSpecs) {
234+
std::vector<std::string> transformSpec = split(transformSpecRaw, ',');
235+
TORCH_CHECK(
236+
transformSpec.size() >= 1,
237+
"Invalid transform spec: " + transformSpecRaw);
238+
239+
auto name = transformSpec[0];
240+
if (name == "resize") {
241+
transforms.push_back(makeResizeTransform(transformSpec));
242+
} else {
243+
TORCH_CHECK(false, "Invalid transform name: " + name);
244+
}
245+
}
246+
return transforms;
247+
}
248+
186249
} // namespace
187250

188251
// ==============================
@@ -252,36 +315,18 @@ at::Tensor _create_from_file_like(
252315

253316
void _add_video_stream(
254317
at::Tensor& decoder,
255-
std::optional<int64_t> width = std::nullopt,
256-
std::optional<int64_t> height = std::nullopt,
257318
std::optional<int64_t> num_threads = std::nullopt,
258319
std::optional<std::string_view> dimension_order = std::nullopt,
259320
std::optional<int64_t> stream_index = std::nullopt,
260321
std::string_view device = "cpu",
261322
std::string_view device_variant = "default",
323+
std::string_view transform_specs = "",
262324
std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>
263325
custom_frame_mappings = std::nullopt,
264326
std::optional<std::string_view> color_conversion_library = std::nullopt) {
265327
VideoStreamOptions videoStreamOptions;
266328
videoStreamOptions.ffmpegThreadCount = num_threads;
267329

268-
// TODO: Eliminate this temporary bridge code. This exists because we have
269-
// not yet exposed the transforms API on the Python side. We also want
270-
// to remove the `width` and `height` arguments from the Python API.
271-
//
272-
// TEMPORARY BRIDGE CODE START
273-
TORCH_CHECK(
274-
width.has_value() == height.has_value(),
275-
"width and height must both be set or unset.");
276-
std::vector<Transform*> transforms;
277-
if (width.has_value()) {
278-
transforms.push_back(
279-
new ResizeTransform(FrameDims(height.value(), width.value())));
280-
width.reset();
281-
height.reset();
282-
}
283-
// TEMPORARY BRIDGE CODE END
284-
285330
if (dimension_order.has_value()) {
286331
std::string stdDimensionOrder{dimension_order.value()};
287332
TORCH_CHECK(stdDimensionOrder == "NHWC" || stdDimensionOrder == "NCHW");
@@ -309,6 +354,9 @@ void _add_video_stream(
309354
videoStreamOptions.device = torch::Device(std::string(device));
310355
videoStreamOptions.deviceVariant = device_variant;
311356

357+
std::vector<Transform*> transforms =
358+
makeTransforms(std::string(transform_specs));
359+
312360
std::optional<SingleStreamDecoder::FrameMappings> converted_mappings =
313361
custom_frame_mappings.has_value()
314362
? std::make_optional(makeFrameMappings(custom_frame_mappings.value()))
@@ -324,24 +372,22 @@ void _add_video_stream(
324372
// Add a new video stream at `stream_index` using the provided options.
325373
void add_video_stream(
326374
at::Tensor& decoder,
327-
std::optional<int64_t> width = std::nullopt,
328-
std::optional<int64_t> height = std::nullopt,
329375
std::optional<int64_t> num_threads = std::nullopt,
330376
std::optional<std::string_view> dimension_order = std::nullopt,
331377
std::optional<int64_t> stream_index = std::nullopt,
332378
std::string_view device = "cpu",
333379
std::string_view device_variant = "default",
380+
std::string_view transform_specs = "",
334381
const std::optional<std::tuple<at::Tensor, at::Tensor, at::Tensor>>&
335382
custom_frame_mappings = std::nullopt) {
336383
_add_video_stream(
337384
decoder,
338-
width,
339-
height,
340385
num_threads,
341386
dimension_order,
342387
stream_index,
343388
device,
344389
device_variant,
390+
transform_specs,
345391
custom_frame_mappings);
346392
}
347393

src/torchcodec/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ if (LINUX)
4444
f7_sha256
4545
1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26
4646
)
47+
set(
48+
f8_sha256
49+
c55b3c1a4b5e4d5fdd7c632bea3ab6f45b4e37cc8e0999dda3f84a8ed8defad8
50+
)
4751
set(
4852
f4_library_file_names
4953
libavutil.so.56
@@ -84,6 +88,16 @@ if (LINUX)
8488
libswscale.so.8
8589
libswresample.so.5
8690
)
91+
set(
92+
f8_library_file_names
93+
libavutil.so.60
94+
libavcodec.so.62
95+
libavformat.so.62
96+
libavdevice.so.62
97+
libavfilter.so.11
98+
libswscale.so.9
99+
libswresample.so.6
100+
)
87101
elseif (APPLE)
88102
set(lib_dir "lib")
89103
set(
@@ -106,6 +120,10 @@ elseif (APPLE)
106120
f7_sha256
107121
48a4fc8ce098305cfd4a58f40889249c523ca3c285f66ba704b5bad0e3ada53a
108122
)
123+
set(
124+
f8_sha256
125+
beb936b76f25d2621228a12cdb67c9ae3d1eff7aa713ef8d1167ebf0c25bd5ec
126+
)
109127

110128
set(
111129
f4_library_file_names
@@ -147,6 +165,16 @@ elseif (APPLE)
147165
libswscale.8.dylib
148166
libswresample.5.dylib
149167
)
168+
set(
169+
f8_library_file_names
170+
libavutil.60.dylib
171+
libavcodec.62.dylib
172+
libavformat.62.dylib
173+
libavdevice.62.dylib
174+
libavfilter.11.dylib
175+
libswscale.9.dylib
176+
libswresample.6.dylib
177+
)
150178

151179
elseif (WIN32)
152180
set(lib_dir "bin")
@@ -170,6 +198,10 @@ elseif (WIN32)
170198
f7_sha256
171199
ae391ace382330e912793b70b68529ee7c91026d2869b4df7e7c3e7d3656bdd5
172200
)
201+
set(
202+
f8_sha256
203+
bac845ac79876b104959cb0e7b9dec772a261116344dd17d2f97e7ddfac4a73f
204+
)
173205

174206
set(
175207
f4_library_file_names
@@ -211,6 +243,16 @@ elseif (WIN32)
211243
swscale.lib
212244
swresample.lib
213245
)
246+
set(
247+
f8_library_file_names
248+
avutil.lib
249+
avcodec.lib
250+
avformat.lib
251+
avdevice.lib
252+
avfilter.lib
253+
swscale.lib
254+
swresample.lib
255+
)
214256
else()
215257
message(
216258
FATAL_ERROR
@@ -242,19 +284,27 @@ FetchContent_Declare(
242284
URL_HASH
243285
SHA256=${f7_sha256}
244286
)
287+
FetchContent_Declare(
288+
f8
289+
URL ${platform_url}/8.0.tar.gz
290+
URL_HASH
291+
SHA256=${f8_sha256}
292+
)
245293

246-
FetchContent_MakeAvailable(f4 f5 f6 f7)
294+
FetchContent_MakeAvailable(f4 f5 f6 f7 f8)
247295

248296
add_library(ffmpeg4 INTERFACE)
249297
add_library(ffmpeg5 INTERFACE)
250298
add_library(ffmpeg6 INTERFACE)
251299
add_library(ffmpeg7 INTERFACE)
300+
add_library(ffmpeg8 INTERFACE)
252301

253302
# Note: the f?_SOURCE_DIR variables were set by FetchContent_MakeAvailable
254303
target_include_directories(ffmpeg4 INTERFACE ${f4_SOURCE_DIR}/include)
255304
target_include_directories(ffmpeg5 INTERFACE ${f5_SOURCE_DIR}/include)
256305
target_include_directories(ffmpeg6 INTERFACE ${f6_SOURCE_DIR}/include)
257306
target_include_directories(ffmpeg7 INTERFACE ${f7_SOURCE_DIR}/include)
307+
target_include_directories(ffmpeg8 INTERFACE ${f8_SOURCE_DIR}/include)
258308

259309

260310
list(
@@ -277,6 +327,11 @@ list(
277327
PREPEND ${f7_SOURCE_DIR}/${lib_dir}/
278328
OUTPUT_VARIABLE f7_library_paths
279329
)
330+
list(
331+
TRANSFORM f8_library_file_names
332+
PREPEND ${f8_SOURCE_DIR}/${lib_dir}/
333+
OUTPUT_VARIABLE f8_library_paths
334+
)
280335

281336
target_link_libraries(
282337
ffmpeg4
@@ -298,3 +353,8 @@ target_link_libraries(
298353
INTERFACE
299354
${f7_library_paths}
300355
)
356+
target_link_libraries(
357+
ffmpeg8
358+
INTERFACE
359+
${f8_library_paths}
360+
)

src/torchcodec/_core/ops.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -299,13 +299,12 @@ def create_from_tensor_abstract(
299299
def _add_video_stream_abstract(
300300
decoder: torch.Tensor,
301301
*,
302-
width: Optional[int] = None,
303-
height: Optional[int] = None,
304302
num_threads: Optional[int] = None,
305303
dimension_order: Optional[str] = None,
306304
stream_index: Optional[int] = None,
307305
device: str = "cpu",
308306
device_variant: str = "default",
307+
transform_specs: str = "",
309308
custom_frame_mappings: Optional[
310309
tuple[torch.Tensor, torch.Tensor, torch.Tensor]
311310
] = None,
@@ -318,13 +317,12 @@ def _add_video_stream_abstract(
318317
def add_video_stream_abstract(
319318
decoder: torch.Tensor,
320319
*,
321-
width: Optional[int] = None,
322-
height: Optional[int] = None,
323320
num_threads: Optional[int] = None,
324321
dimension_order: Optional[str] = None,
325322
stream_index: Optional[int] = None,
326323
device: str = "cpu",
327324
device_variant: str = "default",
325+
transform_specs: str = "",
328326
custom_frame_mappings: Optional[
329327
tuple[torch.Tensor, torch.Tensor, torch.Tensor]
330328
] = None,

0 commit comments

Comments
 (0)