Skip to content

Commit 93f582c

Browse files
Let torchaudio.load() and torchaudio.save() rely on load_with_torchcodec() and save_with_torchcodec(). (#4039)
Co-authored-by: Nicolas Hug <[email protected]> Co-authored-by: Nicolas Hug <[email protected]>
1 parent 02351a6 commit 93f582c

File tree

7 files changed

+307
-81
lines changed

7 files changed

+307
-81
lines changed

.github/scripts/unittest-linux/install.sh

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ case $GPU_ARCH_TYPE in
4040
;;
4141
esac
4242
PYTORCH_WHEEL_INDEX="https://download.pytorch.org/whl/${UPLOAD_CHANNEL}/${GPU_ARCH_ID}"
43-
pip install --progress-bar=off --pre torch torchcodec --index-url="${PYTORCH_WHEEL_INDEX}"
43+
pip install --progress-bar=off --pre torch --index-url="${PYTORCH_WHEEL_INDEX}"
4444

4545

4646
# 2. Install torchaudio
@@ -54,6 +54,5 @@ pip install . -v --no-build-isolation
5454
printf "* Installing test tools\n"
5555
# On this CI, for whatever reason, we're only able to install ffmpeg 4.
5656
conda install -y "ffmpeg<5"
57-
python -c "import torch; import torchaudio; import torchcodec; print(torch.__version__, torchaudio.__version__, torchcodec.__version__)"
5857

5958
pip3 install parameterized requests coverage pytest pytest-cov scipy numpy expecttest

.github/workflows/build_docs.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
6969
GPU_ARCH_ID=cu126 # This is hard-coded and must be consistent with gpu-arch-version.
7070
PYTORCH_WHEEL_INDEX="https://download.pytorch.org/whl/${CHANNEL}/${GPU_ARCH_ID}"
71-
pip install --progress-bar=off --pre torch --index-url="${PYTORCH_WHEEL_INDEX}"
71+
pip install --progress-bar=off --pre torch torchcodec --index-url="${PYTORCH_WHEEL_INDEX}"
7272
7373
echo "::endgroup::"
7474
echo "::group::Install TorchAudio"

src/torchaudio/__init__.py

Lines changed: 169 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from torchaudio._internal.module_utils import dropping_io_support, dropping_class_io_support
2+
from typing import Union, BinaryIO, Optional, Tuple
3+
import os
4+
import torch
25

36
# Initialize extension and backend first
47
from . import _extension # noqa # usort: skip
@@ -7,8 +10,6 @@
710
get_audio_backend as _get_audio_backend,
811
info as _info,
912
list_audio_backends as _list_audio_backends,
10-
load,
11-
save,
1213
set_audio_backend as _set_audio_backend,
1314
)
1415
from ._torchcodec import load_with_torchcodec, save_with_torchcodec
@@ -41,6 +42,172 @@
4142
pass
4243

4344

45+
def load(
46+
uri: Union[BinaryIO, str, os.PathLike],
47+
frame_offset: int = 0,
48+
num_frames: int = -1,
49+
normalize: bool = True,
50+
channels_first: bool = True,
51+
format: Optional[str] = None,
52+
buffer_size: int = 4096,
53+
backend: Optional[str] = None,
54+
) -> Tuple[torch.Tensor, int]:
55+
"""Load audio data from source using TorchCodec's AudioDecoder.
56+
57+
.. note::
58+
59+
As of TorchAudio 2.9, this function relies on TorchCodec's decoding capabilities under the hood. It is
60+
provided for convenience, but we do recommend that you port your code to
61+
natively use ``torchcodec``'s ``AudioDecoder`` class for better
62+
performance:
63+
https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
64+
Because of the reliance on Torchcodec, the parameters ``normalize``, ``buffer_size``, and
65+
``backend`` are ignored and accepted only for backwards compatibility.
66+
67+
68+
Args:
69+
uri (path-like object or file-like object):
70+
Source of audio data. The following types are accepted:
71+
72+
* ``path-like``: File path or URL.
73+
* ``file-like``: Object with ``read(size: int) -> bytes`` method.
74+
75+
frame_offset (int, optional):
76+
Number of samples to skip before start reading data.
77+
num_frames (int, optional):
78+
Maximum number of samples to read. ``-1`` reads all the remaining samples,
79+
starting from ``frame_offset``.
80+
normalize (bool, optional):
81+
TorchCodec always returns normalized float32 samples. This parameter
82+
is ignored and a warning is issued if set to False.
83+
Default: ``True``.
84+
channels_first (bool, optional):
85+
When True, the returned Tensor has dimension `[channel, time]`.
86+
Otherwise, the returned Tensor's dimension is `[time, channel]`.
87+
format (str or None, optional):
88+
Format hint for the decoder. May not be supported by all TorchCodec
89+
decoders. (Default: ``None``)
90+
buffer_size (int, optional):
91+
Not used by TorchCodec AudioDecoder. Provided for API compatibility.
92+
backend (str or None, optional):
93+
Not used by TorchCodec AudioDecoder. Provided for API compatibility.
94+
95+
Returns:
96+
(torch.Tensor, int): Resulting Tensor and sample rate.
97+
Always returns float32 tensors. If ``channels_first=True``, shape is
98+
`[channel, time]`, otherwise `[time, channel]`.
99+
100+
Raises:
101+
ImportError: If torchcodec is not available.
102+
ValueError: If unsupported parameters are used.
103+
RuntimeError: If TorchCodec fails to decode the audio.
104+
105+
Note:
106+
- TorchCodec always returns normalized float32 samples, so the ``normalize``
107+
parameter has no effect.
108+
- The ``buffer_size`` and ``backend`` parameters are ignored.
109+
- Not all audio formats supported by torchaudio backends may be supported
110+
by TorchCodec.
111+
"""
112+
return load_with_torchcodec(
113+
uri,
114+
frame_offset=frame_offset,
115+
num_frames=num_frames,
116+
normalize=normalize,
117+
channels_first=channels_first,
118+
format=format,
119+
buffer_size=buffer_size,
120+
backend=backend
121+
)
122+
123+
def save(
124+
uri: Union[str, os.PathLike],
125+
src: torch.Tensor,
126+
sample_rate: int,
127+
channels_first: bool = True,
128+
format: Optional[str] = None,
129+
encoding: Optional[str] = None,
130+
bits_per_sample: Optional[int] = None,
131+
buffer_size: int = 4096,
132+
backend: Optional[str] = None,
133+
compression: Optional[Union[float, int]] = None,
134+
) -> None:
135+
"""Save audio data to file using TorchCodec's AudioEncoder.
136+
137+
.. note::
138+
139+
As of TorchAudio 2.9, this function relies on TorchCodec's encoding capabilities under the hood.
140+
It is provided for convenience, but we do recommend that you port your code to
141+
natively use ``torchcodec``'s ``AudioEncoder`` class for better
142+
performance:
143+
https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder.
144+
Because of the reliance on Torchcodec, the parameters ``format``, ``encoding``,
145+
``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored and accepted only for
146+
backwards compatibility.
147+
148+
Args:
149+
uri (path-like object):
150+
Path to save the audio file. The file extension determines the format.
151+
152+
src (torch.Tensor):
153+
Audio data to save. Must be a 1D or 2D tensor with float32 values
154+
in the range [-1, 1]. If 2D, shape should be [channel, time] when
155+
channels_first=True, or [time, channel] when channels_first=False.
156+
157+
sample_rate (int):
158+
Sample rate of the audio data.
159+
160+
channels_first (bool, optional):
161+
Indicates whether the input tensor has channels as the first dimension.
162+
If True, expects [channel, time]. If False, expects [time, channel].
163+
Default: True.
164+
165+
format (str or None, optional):
166+
Audio format hint. Not used by TorchCodec (format is determined by
167+
file extension). A warning is issued if provided.
168+
Default: None.
169+
170+
encoding (str or None, optional):
171+
Audio encoding. Not fully supported by TorchCodec AudioEncoder.
172+
A warning is issued if provided. Default: None.
173+
174+
bits_per_sample (int or None, optional):
175+
Bits per sample. Not directly supported by TorchCodec AudioEncoder.
176+
A warning is issued if provided. Default: None.
177+
178+
buffer_size (int, optional):
179+
Not used by TorchCodec AudioEncoder. Provided for API compatibility.
180+
A warning is issued if not default value. Default: 4096.
181+
182+
backend (str or None, optional):
183+
Not used by TorchCodec AudioEncoder. Provided for API compatibility.
184+
A warning is issued if provided. Default: None.
185+
186+
compression (float, int or None, optional):
187+
Compression level or bit rate. Maps to bit_rate parameter in
188+
TorchCodec AudioEncoder. Default: None.
189+
190+
Raises:
191+
ImportError: If torchcodec is not available.
192+
ValueError: If input parameters are invalid.
193+
RuntimeError: If TorchCodec fails to encode the audio.
194+
195+
Note:
196+
- TorchCodec AudioEncoder expects float32 samples in [-1, 1] range.
197+
- Some parameters (format, encoding, bits_per_sample, buffer_size, backend)
198+
are not used by TorchCodec but are provided for API compatibility.
199+
- The output format is determined by the file extension in the uri.
200+
- TorchCodec uses FFmpeg under the hood for encoding.
201+
"""
202+
return save_with_torchcodec(uri, src, sample_rate,
203+
channels_first=channels_first,
204+
format=format,
205+
encoding=encoding,
206+
bits_per_sample=bits_per_sample,
207+
buffer_size=buffer_size,
208+
backend=backend,
209+
compression=compression)
210+
44211
__all__ = [
45212
"AudioMetaData",
46213
"load",

test/conftest.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import sys
2+
from pathlib import Path
3+
4+
# Note: [TorchCodec test dependency mocking hack]
5+
# We are adding the `test/` directory to the system path. This causes the
6+
# `tests/torchcodec` folder to be importable, and in particular, this makes it
7+
# possible to mock torchcodec utilities. E.g. executing:
8+
#
9+
# ```
10+
# from torchcodec.decoders import AudioDecoder
11+
# ```
12+
# directly or indirectly when running the tests will effectively be loading the
13+
# mocked `AudioDecoder` implemented in `test/torchcodec/decoders.py`, which
14+
# relies on scipy instead of relying on torchcodec.
15+
#
16+
# So whenever `torchaudio.load()` is called from within the tests, it's the
17+
# mocked scipy `AudioDecoder` that gets used. Ultimately, this allows us *not*
18+
# to add torchcodec as a test dependency of torchaudio: we can just rely on
19+
# scipy.
20+
#
21+
# This is VERY hacky and ideally we should implement a more robust way to mock
22+
# torchcodec.
23+
sys.path.append(str(Path(__file__).parent.resolve()))

0 commit comments

Comments
 (0)