Skip to content

Commit 12c5cc1

Browse files
author
pytorchbot
committed
2025-03-15 nightly release (8e611bb)
1 parent 30ede61 commit 12c5cc1

File tree

11 files changed

+339
-21
lines changed

11 files changed

+339
-21
lines changed

packaging/build_ffmpeg.sh

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,8 @@ tar -xf ffmpeg.tar.gz --strip-components 1
6161
--enable-avfilter \
6262
--enable-avformat \
6363
--enable-avutil \
64-
--enable-swscale
64+
--enable-swscale \
65+
--enable-swresample
6566

6667
make -j install
6768
ls ${prefix}/*
@@ -78,27 +79,31 @@ if [[ "$(uname)" == Darwin ]]; then
7879
avdevice=libavdevice.58
7980
avfilter=libavfilter.7
8081
swscale=libswscale.5
82+
swresample=libswresample.3
8183
elif [[ ${major_ver} == 5 ]]; then
8284
avutil=libavutil.57
8385
avcodec=libavcodec.59
8486
avformat=libavformat.59
8587
avdevice=libavdevice.59
8688
avfilter=libavfilter.8
8789
swscale=libswscale.6
90+
swresample=libswresample.4
8891
elif [[ ${major_ver} == 6 ]]; then
8992
avutil=libavutil.58
9093
avcodec=libavcodec.60
9194
avformat=libavformat.60
9295
avdevice=libavdevice.60
9396
avfilter=libavfilter.9
9497
swscale=libswscale.7
98+
swresample=libswresample.4
9599
elif [[ ${major_ver} == 7 ]]; then
96100
avutil=libavutil.59
97101
avcodec=libavcodec.61
98102
avformat=libavformat.61
99103
avdevice=libavdevice.61
100104
avfilter=libavfilter.10
101105
swscale=libswscale.8
106+
swresample=libswresample.5
102107
else
103108
printf "Error: unexpected FFmpeg major version: %s\n" ${major_ver}
104109
exit 1;
@@ -120,7 +125,7 @@ if [[ "$(uname)" == Darwin ]]; then
120125
fi
121126

122127
# list up the paths to fix
123-
for lib in ${avcodec} ${avdevice} ${avfilter} ${avformat} ${avutil} ${swscale}; do
128+
for lib in ${avcodec} ${avdevice} ${avfilter} ${avformat} ${avutil} ${swscale} ${swresample}; do
124129
${otool} -l ${prefix}/lib/${lib}.dylib | grep -B2 ${prefix}
125130
done
126131

@@ -155,6 +160,13 @@ if [[ "$(uname)" == Darwin ]]; then
155160
${prefix}/lib/${swscale}.dylib
156161
${otool} -l ${prefix}/lib/${swscale}.dylib | grep -B2 ${prefix}
157162

163+
${install_name_tool} \
164+
-change ${prefix}/lib/${avutil}.dylib @rpath/${avutil}.dylib \
165+
-delete_rpath ${prefix}/lib \
166+
-id @rpath/${swresample}.dylib \
167+
${prefix}/lib/${swresample}.dylib
168+
${otool} -l ${prefix}/lib/${swresample}.dylib | grep -B2 ${prefix}
169+
158170
${install_name_tool} \
159171
-change ${prefix}/lib/${avcodec}.dylib @rpath/${avcodec}.dylib \
160172
-change ${prefix}/lib/${avutil}.dylib @rpath/${avutil}.dylib \

src/torchcodec/decoders/_core/VideoDecoder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ void VideoDecoder::initializeDecoder() {
170170
}
171171
containerMetadata_.numVideoStreams++;
172172
} else if (avStream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
173+
AVSampleFormat format =
174+
static_cast<AVSampleFormat>(avStream->codecpar->format);
175+
streamMetadata.sampleFormat = av_get_sample_fmt_name(format);
173176
containerMetadata_.numAudioStreams++;
174177
}
175178

src/torchcodec/decoders/_core/VideoDecoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class VideoDecoder {
8181
// Audio-only fields
8282
std::optional<int64_t> sampleRate;
8383
std::optional<int64_t> numChannels;
84+
std::optional<std::string> sampleFormat;
8485
};
8586

8687
struct ContainerMetadata {

src/torchcodec/decoders/_core/VideoDecoderOps.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -495,12 +495,15 @@ std::string get_stream_json_metadata(
495495
if (streamMetadata.numChannels.has_value()) {
496496
map["numChannels"] = std::to_string(*streamMetadata.numChannels);
497497
}
498+
if (streamMetadata.sampleFormat.has_value()) {
499+
map["sampleFormat"] = quoteValue(streamMetadata.sampleFormat.value());
500+
}
498501
if (streamMetadata.mediaType == AVMEDIA_TYPE_VIDEO) {
499-
map["mediaType"] = "\"video\"";
502+
map["mediaType"] = quoteValue("video");
500503
} else if (streamMetadata.mediaType == AVMEDIA_TYPE_AUDIO) {
501-
map["mediaType"] = "\"audio\"";
504+
map["mediaType"] = quoteValue("audio");
502505
} else {
503-
map["mediaType"] = "\"other\"";
506+
map["mediaType"] = quoteValue("other");
504507
}
505508
return mapToJson(map);
506509
}

src/torchcodec/decoders/_core/_metadata.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,9 +161,9 @@ def __repr__(self):
161161
class AudioStreamMetadata(StreamMetadata):
162162
"""Metadata of a single audio stream."""
163163

164-
# TODO-AUDIO Add sample format field
165164
sample_rate: Optional[int]
166165
num_channels: Optional[int]
166+
sample_format: Optional[str]
167167

168168
def __repr__(self):
169169
return super().__repr__()
@@ -240,6 +240,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
240240
AudioStreamMetadata(
241241
sample_rate=stream_dict.get("sampleRate"),
242242
num_channels=stream_dict.get("numChannels"),
243+
sample_format=stream_dict.get("sampleFormat"),
243244
**common_meta,
244245
)
245246
)

src/torchcodec/decoders/_core/fetch_and_expose_non_gpl_ffmpeg_libs.cmake

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ include(FetchContent)
1111

1212
set(
1313
base_url
14-
https://pytorch.s3.amazonaws.com/torchcodec/ffmpeg/2024-09-23
14+
https://pytorch.s3.amazonaws.com/torchcodec/ffmpeg/2025-03-14
1515
)
1616

1717
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
@@ -22,19 +22,19 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
2222

2323
set(
2424
f4_sha256
25-
c0311e983c426a7f5abcffc3056f0d64a93bfcb69a9db8e40b81d5c976d84952
25+
1a083f1922443bedb5243d04896383b8c606778a7ddb9d886c8303e55339fe0c
2626
)
2727
set(
2828
f5_sha256
29-
9a48dbe7912a0c3dbbac0c906a840754caf147d37dad2f1b3906de7441e1483a
29+
65d6ad54082d94dcb3f801d73df2265e0e1bb303c7afbce7723e3b77ccd0e207
3030
)
3131
set(
3232
f6_sha256
33-
337cd2ce671a69737e246c73bf69e2c36732d89b7d2c37eefaca8601cad272ca
33+
8bd5939c2f4a4b072e837e7870c13fe7d13824e5ff087ab534e4db4e90b7be9c
3434
)
3535
set(
3636
f7_sha256
37-
b7df528b1c66eb37b926c1336c89a63b3b784165f6f30bd0932a39b82469f0e9
37+
1cb946d8b7c6393c2c3ebe1f900b8de7a2885fe614c45d4ec32c9833084f2f26
3838
)
3939

4040
set(
@@ -45,6 +45,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
4545
libavdevice.so.58
4646
libavfilter.so.7
4747
libswscale.so.5
48+
libswresample.so.3
4849
)
4950
set(
5051
f5_library_file_names
@@ -54,6 +55,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
5455
libavdevice.so.59
5556
libavfilter.so.8
5657
libswscale.so.6
58+
libswresample.so.4
5759
)
5860
set(
5961
f6_library_file_names
@@ -63,6 +65,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
6365
libavdevice.so.60
6466
libavfilter.so.9
6567
libswscale.so.7
68+
libswresample.so.4
6669
)
6770
set(
6871
f7_library_file_names
@@ -72,6 +75,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
7275
libavdevice.so.61
7376
libavfilter.so.10
7477
libswscale.so.8
78+
libswresample.so.5
7579
)
7680
elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
7781
set(
@@ -80,19 +84,19 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
8084
)
8185
set(
8286
f4_sha256
83-
57622392af27bf479e18afb9d79ccf3deddaef153048b34ce518bd477c12d1e6
87+
f0335434529d9e19359eae0fe912dd9e747667534a1c92e662f5219a55dfad8c
8488
)
8589
set(
8690
f5_sha256
87-
7bc5a70ac66d45857372ccabdcd15594aa9a39a86bc396f92724435e5c4de54e
91+
cfc3449c9af6863731a431ce89e32c08c5f8ece94b306fb6b695828502a76166
8892
)
8993
set(
9094
f6_sha256
91-
0214733bc987c2deeabfc779331108c19964dcdac2c5e2db12960f0febcea2c4
95+
ec47b4783c342038e720e33b2fdfa55a9a490afb1cf37a26467733983688647e
9296
)
9397
set(
9498
f7_sha256
95-
c28925bb423383c0c37d9f3106fa7768c8733153a33154c8bedab8acf883366f
99+
48a4fc8ce098305cfd4a58f40889249c523ca3c285f66ba704b5bad0e3ada53a
96100
)
97101
set(
98102
f4_library_file_names
@@ -102,6 +106,7 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
102106
libavdevice.58.dylib
103107
libavfilter.7.dylib
104108
libswscale.5.dylib
109+
libswresample.3.dylib
105110
)
106111
set(
107112
f5_library_file_names
@@ -111,6 +116,7 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
111116
libavdevice.59.dylib
112117
libavfilter.8.dylib
113118
libswscale.6.dylib
119+
libswresample.4.dylib
114120
)
115121
set(
116122
f6_library_file_names
@@ -120,6 +126,7 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
120126
libavdevice.60.dylib
121127
libavfilter.9.dylib
122128
libswscale.7.dylib
129+
libswresample.4.dylib
123130
)
124131
set(
125132
f7_library_file_names
@@ -129,6 +136,7 @@ elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
129136
libavdevice.61.dylib
130137
libavfilter.10.dylib
131138
libswscale.8.dylib
139+
libswresample.5.dylib
132140
)
133141
else()
134142
message(

test/decoders/test_decoders.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
NASA_AUDIO,
2626
NASA_AUDIO_MP3,
2727
NASA_VIDEO,
28+
SINE_MONO_S32,
2829
)
2930

3031

@@ -940,7 +941,7 @@ def get_some_frames(decoder):
940941

941942

942943
class TestAudioDecoder:
943-
@pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
944+
@pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3, SINE_MONO_S32))
944945
def test_metadata(self, asset):
945946
decoder = AudioDecoder(asset.path)
946947
assert isinstance(decoder.metadata, AudioStreamMetadata)
@@ -955,6 +956,7 @@ def test_metadata(self, asset):
955956
)
956957
assert decoder.metadata.sample_rate == asset.sample_rate
957958
assert decoder.metadata.num_channels == asset.num_channels
959+
assert decoder.metadata.sample_format == asset.sample_format
958960

959961
@pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
960962
def test_error(self, asset):

test/decoders/test_metadata.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def test_get_metadata(metadata_getter):
9090
)
9191
assert best_audio_stream_metadata.bit_rate == 128837
9292
assert best_audio_stream_metadata.codec == "aac"
93+
assert best_audio_stream_metadata.sample_format == "fltp"
9394

9495

9596
@pytest.mark.parametrize(
@@ -109,6 +110,7 @@ def test_get_metadata_audio_file(metadata_getter):
109110
)
110111
assert best_audio_stream_metadata.bit_rate == 64000
111112
assert best_audio_stream_metadata.codec == "mp3"
113+
assert best_audio_stream_metadata.sample_format == "fltp"
112114

113115

114116
@pytest.mark.parametrize(

test/resources/sine_mono_s32.wav

250 KB
Binary file not shown.

0 commit comments

Comments
 (0)