1
+ """TorchCodec integration for TorchAudio."""
2
+
3
+ import os
4
+ from typing import BinaryIO , Optional , Tuple , Union
5
+
6
+ import torch
7
+
8
+
9
+ def load_with_torchcodec (
10
+ uri : Union [BinaryIO , str , os .PathLike ],
11
+ frame_offset : int = 0 ,
12
+ num_frames : int = - 1 ,
13
+ normalize : bool = True ,
14
+ channels_first : bool = True ,
15
+ format : Optional [str ] = None ,
16
+ buffer_size : int = 4096 ,
17
+ backend : Optional [str ] = None ,
18
+ ) -> Tuple [torch .Tensor , int ]:
19
+ """Load audio data from source using TorchCodec's AudioDecoder.
20
+
21
+ .. note::
22
+
23
+ This function supports the same API as ``torchaudio.load()``, and relies
24
+ on TorchCodec's decoding capabilities under the hood. It is provided for
25
+ convenience, but we do recommend that you port your code to natively use
26
+ ``torchcodec``'s ``AudioDecoder`` class for better performance:
27
+ https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
28
+ In TorchAudio 2.9, ``torchaudio.load()`` will be relying on
29
+ ``load_with_torchcodec``. Note that some parameters of
30
+ ``torchaudio.load()``, like ``normalize``, ``buffer_size``, and
31
+ ``backend``, are ignored by ``load_with_torchcodec``.
32
+
33
+
34
+ Args:
35
+ uri (path-like object or file-like object):
36
+ Source of audio data. The following types are accepted:
37
+
38
+ * ``path-like``: File path or URL.
39
+ * ``file-like``: Object with ``read(size: int) -> bytes`` method.
40
+
41
+ frame_offset (int, optional):
42
+ Number of samples to skip before start reading data.
43
+ num_frames (int, optional):
44
+ Maximum number of samples to read. ``-1`` reads all the remaining samples,
45
+ starting from ``frame_offset``.
46
+ normalize (bool, optional):
47
+ TorchCodec always returns normalized float32 samples. This parameter
48
+ is ignored and a warning is issued if set to False.
49
+ Default: ``True``.
50
+ channels_first (bool, optional):
51
+ When True, the returned Tensor has dimension `[channel, time]`.
52
+ Otherwise, the returned Tensor's dimension is `[time, channel]`.
53
+ format (str or None, optional):
54
+ Format hint for the decoder. May not be supported by all TorchCodec
55
+ decoders. (Default: ``None``)
56
+ buffer_size (int, optional):
57
+ Not used by TorchCodec AudioDecoder. Provided for API compatibility.
58
+ backend (str or None, optional):
59
+ Not used by TorchCodec AudioDecoder. Provided for API compatibility.
60
+
61
+ Returns:
62
+ (torch.Tensor, int): Resulting Tensor and sample rate.
63
+ Always returns float32 tensors. If ``channels_first=True``, shape is
64
+ `[channel, time]`, otherwise `[time, channel]`.
65
+
66
+ Raises:
67
+ ImportError: If torchcodec is not available.
68
+ ValueError: If unsupported parameters are used.
69
+ RuntimeError: If TorchCodec fails to decode the audio.
70
+
71
+ Note:
72
+ - TorchCodec always returns normalized float32 samples, so the ``normalize``
73
+ parameter has no effect.
74
+ - The ``buffer_size`` and ``backend`` parameters are ignored.
75
+ - Not all audio formats supported by torchaudio backends may be supported
76
+ by TorchCodec.
77
+ """
78
+ # Import torchcodec here to provide clear error if not available
79
+ try :
80
+ from torchcodec .decoders import AudioDecoder
81
+ except ImportError as e :
82
+ raise ImportError (
83
+ "TorchCodec is required for load_with_torchcodec. "
84
+ "Please install torchcodec to use this function."
85
+ ) from e
86
+
87
+ # Parameter validation and warnings
88
+ if not normalize :
89
+ import warnings
90
+ warnings .warn (
91
+ "TorchCodec AudioDecoder always returns normalized float32 samples. "
92
+ "The 'normalize=False' parameter is ignored." ,
93
+ UserWarning ,
94
+ stacklevel = 2
95
+ )
96
+
97
+ if buffer_size != 4096 :
98
+ import warnings
99
+ warnings .warn (
100
+ "The 'buffer_size' parameter is not used by TorchCodec AudioDecoder." ,
101
+ UserWarning ,
102
+ stacklevel = 2
103
+ )
104
+
105
+ if backend is not None :
106
+ import warnings
107
+ warnings .warn (
108
+ "The 'backend' parameter is not used by TorchCodec AudioDecoder." ,
109
+ UserWarning ,
110
+ stacklevel = 2
111
+ )
112
+
113
+ if format is not None :
114
+ import warnings
115
+ warnings .warn (
116
+ "The 'format' parameter is not supported by TorchCodec AudioDecoder." ,
117
+ UserWarning ,
118
+ stacklevel = 2
119
+ )
120
+
121
+ # Create AudioDecoder
122
+ try :
123
+ decoder = AudioDecoder (uri )
124
+ except Exception as e :
125
+ raise RuntimeError (f"Failed to create AudioDecoder for { uri } : { e } " ) from e
126
+
127
+ # Get sample rate from metadata
128
+ sample_rate = decoder .metadata .sample_rate
129
+ if sample_rate is None :
130
+ raise RuntimeError ("Unable to determine sample rate from audio metadata" )
131
+
132
+ # Decode the entire file first, then subsample manually
133
+ # This is the simplest approach since torchcodec uses time-based indexing
134
+ try :
135
+ audio_samples = decoder .get_all_samples ()
136
+ except Exception as e :
137
+ raise RuntimeError (f"Failed to decode audio samples: { e } " ) from e
138
+
139
+ data = audio_samples .data
140
+
141
+ # Apply frame_offset and num_frames (which are actually sample offsets)
142
+ if frame_offset > 0 :
143
+ if frame_offset >= data .shape [1 ]:
144
+ # Return empty tensor if offset is beyond available data
145
+ empty_shape = (data .shape [0 ], 0 ) if channels_first else (0 , data .shape [0 ])
146
+ return torch .zeros (empty_shape , dtype = torch .float32 ), sample_rate
147
+ data = data [:, frame_offset :]
148
+
149
+ if num_frames == 0 :
150
+ # Return empty tensor if num_frames is 0
151
+ empty_shape = (data .shape [0 ], 0 ) if channels_first else (0 , data .shape [0 ])
152
+ return torch .zeros (empty_shape , dtype = torch .float32 ), sample_rate
153
+ elif num_frames > 0 :
154
+ data = data [:, :num_frames ]
155
+
156
+ # TorchCodec returns data in [channel, time] format by default
157
+ # Handle channels_first parameter
158
+ if not channels_first :
159
+ data = data .transpose (0 , 1 ) # [channel, time] -> [time, channel]
160
+
161
+ return data , sample_rate
0 commit comments