|
1 | 1 | from torchaudio._internal.module_utils import dropping_io_support, dropping_class_io_support
|
| 2 | +from typing import Union, BinaryIO, Optional, Tuple |
| 3 | +import os |
| 4 | +import torch |
2 | 5 |
|
3 | 6 | # Initialize extension and backend first
|
4 | 7 | from . import _extension # noqa # usort: skip
|
|
7 | 10 | get_audio_backend as _get_audio_backend,
|
8 | 11 | info as _info,
|
9 | 12 | list_audio_backends as _list_audio_backends,
|
10 |
| - load, |
11 |
| - save, |
12 | 13 | set_audio_backend as _set_audio_backend,
|
13 | 14 | )
|
14 | 15 | from ._torchcodec import load_with_torchcodec, save_with_torchcodec
|
|
41 | 42 | pass
|
42 | 43 |
|
43 | 44 |
|
| 45 | +def load( |
| 46 | + uri: Union[BinaryIO, str, os.PathLike], |
| 47 | + frame_offset: int = 0, |
| 48 | + num_frames: int = -1, |
| 49 | + normalize: bool = True, |
| 50 | + channels_first: bool = True, |
| 51 | + format: Optional[str] = None, |
| 52 | + buffer_size: int = 4096, |
| 53 | + backend: Optional[str] = None, |
| 54 | +) -> Tuple[torch.Tensor, int]: |
| 55 | + """Load audio data from source using TorchCodec's AudioDecoder. |
| 56 | +
|
| 57 | + .. note:: |
| 58 | +
|
| 59 | + As of TorchAudio 2.9, this function relies on TorchCodec's decoding capabilities under the hood. It is |
| 60 | + provided for convenience, but we do recommend that you port your code to |
| 61 | + natively use ``torchcodec``'s ``AudioDecoder`` class for better |
| 62 | + performance: |
| 63 | + https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder. |
| 64 | + Because of the reliance on Torchcodec, the parameters ``normalize``, ``buffer_size``, and |
| 65 | + ``backend`` are ignored and accepted only for backwards compatibility. |
| 66 | +
|
| 67 | +
|
| 68 | + Args: |
| 69 | + uri (path-like object or file-like object): |
| 70 | + Source of audio data. The following types are accepted: |
| 71 | +
|
| 72 | + * ``path-like``: File path or URL. |
| 73 | + * ``file-like``: Object with ``read(size: int) -> bytes`` method. |
| 74 | +
|
| 75 | + frame_offset (int, optional): |
| 76 | + Number of samples to skip before start reading data. |
| 77 | + num_frames (int, optional): |
| 78 | + Maximum number of samples to read. ``-1`` reads all the remaining samples, |
| 79 | + starting from ``frame_offset``. |
| 80 | + normalize (bool, optional): |
| 81 | + TorchCodec always returns normalized float32 samples. This parameter |
| 82 | + is ignored and a warning is issued if set to False. |
| 83 | + Default: ``True``. |
| 84 | + channels_first (bool, optional): |
| 85 | + When True, the returned Tensor has dimension `[channel, time]`. |
| 86 | + Otherwise, the returned Tensor's dimension is `[time, channel]`. |
| 87 | + format (str or None, optional): |
| 88 | + Format hint for the decoder. May not be supported by all TorchCodec |
| 89 | + decoders. (Default: ``None``) |
| 90 | + buffer_size (int, optional): |
| 91 | + Not used by TorchCodec AudioDecoder. Provided for API compatibility. |
| 92 | + backend (str or None, optional): |
| 93 | + Not used by TorchCodec AudioDecoder. Provided for API compatibility. |
| 94 | +
|
| 95 | + Returns: |
| 96 | + (torch.Tensor, int): Resulting Tensor and sample rate. |
| 97 | + Always returns float32 tensors. If ``channels_first=True``, shape is |
| 98 | + `[channel, time]`, otherwise `[time, channel]`. |
| 99 | +
|
| 100 | + Raises: |
| 101 | + ImportError: If torchcodec is not available. |
| 102 | + ValueError: If unsupported parameters are used. |
| 103 | + RuntimeError: If TorchCodec fails to decode the audio. |
| 104 | +
|
| 105 | + Note: |
| 106 | + - TorchCodec always returns normalized float32 samples, so the ``normalize`` |
| 107 | + parameter has no effect. |
| 108 | + - The ``buffer_size`` and ``backend`` parameters are ignored. |
| 109 | + - Not all audio formats supported by torchaudio backends may be supported |
| 110 | + by TorchCodec. |
| 111 | + """ |
| 112 | + return load_with_torchcodec( |
| 113 | + uri, |
| 114 | + frame_offset=frame_offset, |
| 115 | + num_frames=num_frames, |
| 116 | + normalize=normalize, |
| 117 | + channels_first=channels_first, |
| 118 | + format=format, |
| 119 | + buffer_size=buffer_size, |
| 120 | + backend=backend |
| 121 | + ) |
| 122 | + |
| 123 | +def save( |
| 124 | + uri: Union[str, os.PathLike], |
| 125 | + src: torch.Tensor, |
| 126 | + sample_rate: int, |
| 127 | + channels_first: bool = True, |
| 128 | + format: Optional[str] = None, |
| 129 | + encoding: Optional[str] = None, |
| 130 | + bits_per_sample: Optional[int] = None, |
| 131 | + buffer_size: int = 4096, |
| 132 | + backend: Optional[str] = None, |
| 133 | + compression: Optional[Union[float, int]] = None, |
| 134 | +) -> None: |
| 135 | + """Save audio data to file using TorchCodec's AudioEncoder. |
| 136 | +
|
| 137 | + .. note:: |
| 138 | +
|
| 139 | + As of TorchAudio 2.9, this function relies on TorchCodec's encoding capabilities under the hood. |
| 140 | + It is provided for convenience, but we do recommend that you port your code to |
| 141 | + natively use ``torchcodec``'s ``AudioEncoder`` class for better |
| 142 | + performance: |
| 143 | + https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder. |
| 144 | + Because of the reliance on Torchcodec, the parameters ``format``, ``encoding``, |
| 145 | + ``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored and accepted only for |
| 146 | + backwards compatibility. |
| 147 | +
|
| 148 | + Args: |
| 149 | + uri (path-like object): |
| 150 | + Path to save the audio file. The file extension determines the format. |
| 151 | +
|
| 152 | + src (torch.Tensor): |
| 153 | + Audio data to save. Must be a 1D or 2D tensor with float32 values |
| 154 | + in the range [-1, 1]. If 2D, shape should be [channel, time] when |
| 155 | + channels_first=True, or [time, channel] when channels_first=False. |
| 156 | +
|
| 157 | + sample_rate (int): |
| 158 | + Sample rate of the audio data. |
| 159 | +
|
| 160 | + channels_first (bool, optional): |
| 161 | + Indicates whether the input tensor has channels as the first dimension. |
| 162 | + If True, expects [channel, time]. If False, expects [time, channel]. |
| 163 | + Default: True. |
| 164 | +
|
| 165 | + format (str or None, optional): |
| 166 | + Audio format hint. Not used by TorchCodec (format is determined by |
| 167 | + file extension). A warning is issued if provided. |
| 168 | + Default: None. |
| 169 | +
|
| 170 | + encoding (str or None, optional): |
| 171 | + Audio encoding. Not fully supported by TorchCodec AudioEncoder. |
| 172 | + A warning is issued if provided. Default: None. |
| 173 | +
|
| 174 | + bits_per_sample (int or None, optional): |
| 175 | + Bits per sample. Not directly supported by TorchCodec AudioEncoder. |
| 176 | + A warning is issued if provided. Default: None. |
| 177 | +
|
| 178 | + buffer_size (int, optional): |
| 179 | + Not used by TorchCodec AudioEncoder. Provided for API compatibility. |
| 180 | + A warning is issued if not default value. Default: 4096. |
| 181 | +
|
| 182 | + backend (str or None, optional): |
| 183 | + Not used by TorchCodec AudioEncoder. Provided for API compatibility. |
| 184 | + A warning is issued if provided. Default: None. |
| 185 | +
|
| 186 | + compression (float, int or None, optional): |
| 187 | + Compression level or bit rate. Maps to bit_rate parameter in |
| 188 | + TorchCodec AudioEncoder. Default: None. |
| 189 | +
|
| 190 | + Raises: |
| 191 | + ImportError: If torchcodec is not available. |
| 192 | + ValueError: If input parameters are invalid. |
| 193 | + RuntimeError: If TorchCodec fails to encode the audio. |
| 194 | +
|
| 195 | + Note: |
| 196 | + - TorchCodec AudioEncoder expects float32 samples in [-1, 1] range. |
| 197 | + - Some parameters (format, encoding, bits_per_sample, buffer_size, backend) |
| 198 | + are not used by TorchCodec but are provided for API compatibility. |
| 199 | + - The output format is determined by the file extension in the uri. |
| 200 | + - TorchCodec uses FFmpeg under the hood for encoding. |
| 201 | + """ |
| 202 | + return save_with_torchcodec(uri, src, sample_rate, |
| 203 | + channels_first=channels_first, |
| 204 | + format=format, |
| 205 | + encoding=encoding, |
| 206 | + bits_per_sample=bits_per_sample, |
| 207 | + buffer_size=buffer_size, |
| 208 | + backend=backend, |
| 209 | + compression=compression) |
| 210 | + |
44 | 211 | __all__ = [
|
45 | 212 | "AudioMetaData",
|
46 | 213 | "load",
|
|
0 commit comments