@@ -17,28 +17,28 @@ def load_with_torchcodec(
17
17
backend : Optional [str ] = None ,
18
18
) -> Tuple [torch .Tensor , int ]:
19
19
"""Load audio data from source using TorchCodec's AudioDecoder.
20
-
20
+
21
21
.. note::
22
-
22
+
23
23
This function supports the same API as :func:`~torchaudio.load`, and
24
24
relies on TorchCodec's decoding capabilities under the hood. It is
25
25
provided for convenience, but we do recommend that you port your code to
26
26
natively use ``torchcodec``'s ``AudioDecoder`` class for better
27
27
performance:
28
28
https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
29
- In TorchAudio 2.9, :func:`~torchaudio.load` will be relying on
29
+ As of TorchAudio 2.9, :func:`~torchaudio.load` relies on
30
30
:func:`~torchaudio.load_with_torchcodec`. Note that some parameters of
31
31
:func:`~torchaudio.load`, like ``normalize``, ``buffer_size``, and
32
32
``backend``, are ignored by :func:`~torchaudio.load_with_torchcodec`.
33
-
34
-
33
+
34
+
35
35
Args:
36
36
uri (path-like object or file-like object):
37
37
Source of audio data. The following types are accepted:
38
-
38
+
39
39
* ``path-like``: File path or URL.
40
40
* ``file-like``: Object with ``read(size: int) -> bytes`` method.
41
-
41
+
42
42
frame_offset (int, optional):
43
43
Number of samples to skip before start reading data.
44
44
num_frames (int, optional):
@@ -58,17 +58,17 @@ def load_with_torchcodec(
58
58
Not used by TorchCodec AudioDecoder. Provided for API compatibility.
59
59
backend (str or None, optional):
60
60
Not used by TorchCodec AudioDecoder. Provided for API compatibility.
61
-
61
+
62
62
Returns:
63
63
(torch.Tensor, int): Resulting Tensor and sample rate.
64
64
Always returns float32 tensors. If ``channels_first=True``, shape is
65
65
`[channel, time]`, otherwise `[time, channel]`.
66
-
66
+
67
67
Raises:
68
68
ImportError: If torchcodec is not available.
69
69
ValueError: If unsupported parameters are used.
70
70
RuntimeError: If TorchCodec fails to decode the audio.
71
-
71
+
72
72
Note:
73
73
- TorchCodec always returns normalized float32 samples, so the ``normalize``
74
74
parameter has no effect.
@@ -84,7 +84,7 @@ def load_with_torchcodec(
84
84
"TorchCodec is required for load_with_torchcodec. "
85
85
"Please install torchcodec to use this function."
86
86
) from e
87
-
87
+
88
88
# Parameter validation and warnings
89
89
if not normalize :
90
90
import warnings
@@ -94,71 +94,71 @@ def load_with_torchcodec(
94
94
UserWarning ,
95
95
stacklevel = 2
96
96
)
97
-
97
+
98
98
if buffer_size != 4096 :
99
99
import warnings
100
100
warnings .warn (
101
101
"The 'buffer_size' parameter is not used by TorchCodec AudioDecoder." ,
102
102
UserWarning ,
103
103
stacklevel = 2
104
104
)
105
-
105
+
106
106
if backend is not None :
107
107
import warnings
108
108
warnings .warn (
109
109
"The 'backend' parameter is not used by TorchCodec AudioDecoder." ,
110
110
UserWarning ,
111
111
stacklevel = 2
112
112
)
113
-
113
+
114
114
if format is not None :
115
115
import warnings
116
116
warnings .warn (
117
117
"The 'format' parameter is not supported by TorchCodec AudioDecoder." ,
118
118
UserWarning ,
119
119
stacklevel = 2
120
120
)
121
-
121
+
122
122
# Create AudioDecoder
123
123
try :
124
124
decoder = AudioDecoder (uri )
125
125
except Exception as e :
126
126
raise RuntimeError (f"Failed to create AudioDecoder for { uri } : { e } " ) from e
127
-
127
+
128
128
# Get sample rate from metadata
129
129
sample_rate = decoder .metadata .sample_rate
130
130
if sample_rate is None :
131
131
raise RuntimeError ("Unable to determine sample rate from audio metadata" )
132
-
132
+
133
133
# Decode the entire file first, then subsample manually
134
134
# This is the simplest approach since torchcodec uses time-based indexing
135
135
try :
136
136
audio_samples = decoder .get_all_samples ()
137
137
except Exception as e :
138
138
raise RuntimeError (f"Failed to decode audio samples: { e } " ) from e
139
-
139
+
140
140
data = audio_samples .data
141
-
141
+
142
142
# Apply frame_offset and num_frames (which are actually sample offsets)
143
143
if frame_offset > 0 :
144
144
if frame_offset >= data .shape [1 ]:
145
145
# Return empty tensor if offset is beyond available data
146
146
empty_shape = (data .shape [0 ], 0 ) if channels_first else (0 , data .shape [0 ])
147
147
return torch .zeros (empty_shape , dtype = torch .float32 ), sample_rate
148
148
data = data [:, frame_offset :]
149
-
149
+
150
150
if num_frames == 0 :
151
151
# Return empty tensor if num_frames is 0
152
152
empty_shape = (data .shape [0 ], 0 ) if channels_first else (0 , data .shape [0 ])
153
153
return torch .zeros (empty_shape , dtype = torch .float32 ), sample_rate
154
154
elif num_frames > 0 :
155
155
data = data [:, :num_frames ]
156
-
156
+
157
157
# TorchCodec returns data in [channel, time] format by default
158
158
# Handle channels_first parameter
159
159
if not channels_first :
160
160
data = data .transpose (0 , 1 ) # [channel, time] -> [time, channel]
161
-
161
+
162
162
return data , sample_rate
163
163
164
164
@@ -177,70 +177,70 @@ def save_with_torchcodec(
177
177
"""Save audio data to file using TorchCodec's AudioEncoder.
178
178
179
179
.. note::
180
-
180
+
181
181
This function supports the same API as :func:`~torchaudio.save`, and
182
182
relies on TorchCodec's encoding capabilities under the hood. It is
183
183
provided for convenience, but we do recommend that you port your code to
184
184
natively use ``torchcodec``'s ``AudioEncoder`` class for better
185
185
performance:
186
186
https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder.
187
- In TorchAudio 2.9, :func:`~torchaudio.save` will be relying on
187
+ As of TorchAudio 2.9, :func:`~torchaudio.save` relies on
188
188
:func:`~torchaudio.save_with_torchcodec`. Note that some parameters of
189
189
:func:`~torchaudio.save`, like ``format``, ``encoding``,
190
190
``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored by
191
191
are ignored by :func:`~torchaudio.save_with_torchcodec`.
192
-
192
+
193
193
This function provides a TorchCodec-based alternative to torchaudio.save
194
194
with the same API. TorchCodec's AudioEncoder provides efficient encoding
195
195
with FFmpeg under the hood.
196
-
196
+
197
197
Args:
198
198
uri (path-like object):
199
199
Path to save the audio file. The file extension determines the format.
200
-
200
+
201
201
src (torch.Tensor):
202
202
Audio data to save. Must be a 1D or 2D tensor with float32 values
203
203
in the range [-1, 1]. If 2D, shape should be [channel, time] when
204
204
channels_first=True, or [time, channel] when channels_first=False.
205
-
205
+
206
206
sample_rate (int):
207
207
Sample rate of the audio data.
208
-
208
+
209
209
channels_first (bool, optional):
210
210
Indicates whether the input tensor has channels as the first dimension.
211
211
If True, expects [channel, time]. If False, expects [time, channel].
212
212
Default: True.
213
-
213
+
214
214
format (str or None, optional):
215
215
Audio format hint. Not used by TorchCodec (format is determined by
216
216
file extension). A warning is issued if provided.
217
217
Default: None.
218
-
218
+
219
219
encoding (str or None, optional):
220
220
Audio encoding. Not fully supported by TorchCodec AudioEncoder.
221
221
A warning is issued if provided. Default: None.
222
-
222
+
223
223
bits_per_sample (int or None, optional):
224
224
Bits per sample. Not directly supported by TorchCodec AudioEncoder.
225
225
A warning is issued if provided. Default: None.
226
-
226
+
227
227
buffer_size (int, optional):
228
228
Not used by TorchCodec AudioEncoder. Provided for API compatibility.
229
229
A warning is issued if not default value. Default: 4096.
230
-
230
+
231
231
backend (str or None, optional):
232
232
Not used by TorchCodec AudioEncoder. Provided for API compatibility.
233
233
A warning is issued if provided. Default: None.
234
-
234
+
235
235
compression (float, int or None, optional):
236
236
Compression level or bit rate. Maps to bit_rate parameter in
237
237
TorchCodec AudioEncoder. Default: None.
238
-
238
+
239
239
Raises:
240
240
ImportError: If torchcodec is not available.
241
241
ValueError: If input parameters are invalid.
242
242
RuntimeError: If TorchCodec fails to encode the audio.
243
-
243
+
244
244
Note:
245
245
- TorchCodec AudioEncoder expects float32 samples in [-1, 1] range.
246
246
- Some parameters (format, encoding, bits_per_sample, buffer_size, backend)
@@ -256,7 +256,7 @@ def save_with_torchcodec(
256
256
"TorchCodec is required for save_with_torchcodec. "
257
257
"Please install torchcodec to use this function."
258
258
) from e
259
-
259
+
260
260
# Parameter validation and warnings
261
261
if format is not None :
262
262
import warnings
@@ -266,49 +266,49 @@ def save_with_torchcodec(
266
266
UserWarning ,
267
267
stacklevel = 2
268
268
)
269
-
269
+
270
270
if encoding is not None :
271
271
import warnings
272
272
warnings .warn (
273
273
"The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder." ,
274
274
UserWarning ,
275
275
stacklevel = 2
276
276
)
277
-
277
+
278
278
if bits_per_sample is not None :
279
279
import warnings
280
280
warnings .warn (
281
281
"The 'bits_per_sample' parameter is not directly supported by TorchCodec AudioEncoder." ,
282
282
UserWarning ,
283
283
stacklevel = 2
284
284
)
285
-
285
+
286
286
if buffer_size != 4096 :
287
287
import warnings
288
288
warnings .warn (
289
289
"The 'buffer_size' parameter is not used by TorchCodec AudioEncoder." ,
290
290
UserWarning ,
291
291
stacklevel = 2
292
292
)
293
-
293
+
294
294
if backend is not None :
295
295
import warnings
296
296
warnings .warn (
297
297
"The 'backend' parameter is not used by TorchCodec AudioEncoder." ,
298
298
UserWarning ,
299
299
stacklevel = 2
300
300
)
301
-
301
+
302
302
# Input validation
303
303
if not isinstance (src , torch .Tensor ):
304
304
raise ValueError (f"Expected src to be a torch.Tensor, got { type (src )} " )
305
-
305
+
306
306
if src .dtype != torch .float32 :
307
307
src = src .float ()
308
-
308
+
309
309
if sample_rate <= 0 :
310
310
raise ValueError (f"sample_rate must be positive, got { sample_rate } " )
311
-
311
+
312
312
# Handle tensor shape and channels_first
313
313
if src .ndim == 1 :
314
314
# Convert to 2D: [1, time] for channels_first=True
@@ -324,13 +324,13 @@ def save_with_torchcodec(
324
324
data = src .transpose (0 , 1 ) # [time, channel] -> [channel, time]
325
325
else :
326
326
raise ValueError (f"Expected 1D or 2D tensor, got { src .ndim } D tensor" )
327
-
327
+
328
328
# Create AudioEncoder
329
329
try :
330
330
encoder = AudioEncoder (data , sample_rate = sample_rate )
331
331
except Exception as e :
332
332
raise RuntimeError (f"Failed to create AudioEncoder: { e } " ) from e
333
-
333
+
334
334
# Determine bit_rate from compression parameter
335
335
bit_rate = None
336
336
if compression is not None :
@@ -344,7 +344,7 @@ def save_with_torchcodec(
344
344
UserWarning ,
345
345
stacklevel = 2
346
346
)
347
-
347
+
348
348
# Save to file
349
349
try :
350
350
encoder .to_file (uri , bit_rate = bit_rate )
0 commit comments