11import os
2- import warnings
32from typing import Tuple , Optional
43
54import torch
@@ -152,26 +151,6 @@ def load(
152151 filepath , frame_offset , num_frames , normalize , channels_first , format )
153152
154153
155- @torch .jit .unused
156- def _save (
157- filepath : str ,
158- src : torch .Tensor ,
159- sample_rate : int ,
160- channels_first : bool = True ,
161- compression : Optional [float ] = None ,
162- format : Optional [str ] = None ,
163- dtype : Optional [str ] = None ,
164- ):
165- if hasattr (filepath , 'write' ):
166- if format is None :
167- raise RuntimeError ('`format` is required when saving to file object.' )
168- torchaudio ._torchaudio .save_audio_fileobj (
169- filepath , src , sample_rate , channels_first , compression , format , dtype )
170- else :
171- torch .ops .torchaudio .sox_io_save_audio_file (
172- os .fspath (filepath ), src , sample_rate , channels_first , compression , format , dtype )
173-
174-
175154@_mod_utils .requires_module ('torchaudio._torchaudio' )
176155def save (
177156 filepath : str ,
@@ -180,30 +159,11 @@ def save(
180159 channels_first : bool = True ,
181160 compression : Optional [float ] = None ,
182161 format : Optional [str ] = None ,
183- dtype : Optional [str ] = None ,
162+ encoding : Optional [str ] = None ,
163+ bits_per_sample : Optional [int ] = None ,
184164):
185165 """Save audio data to file.
186166
187- Note:
188- Supported formats are;
189-
190- * WAV, AMB
191-
192- * 32-bit floating-point
193- * 32-bit signed integer
194- * 16-bit signed integer
195- * 8-bit unsigned integer
196-
197- * MP3
198- * FLAC
199- * OGG/VORBIS
200- * SPHERE
201- * AMR-NB
202-
203- To save ``MP3``, ``FLAC``, ``OGG/VORBIS``, and other codecs ``libsox`` does not
204- handle natively, your installation of ``torchaudio`` has to be linked to ``libsox``
205- and corresponding codec libraries such as ``libmad`` or ``libmp3lame`` etc.
206-
207167 Args:
208168 filepath (str or pathlib.Path): Path to save file.
209169 This function also handles ``pathlib.Path`` objects, but is annotated
@@ -215,32 +175,137 @@ def save(
215175 compression (Optional[float]): Used for formats other than WAV.
216176 This corresponds to ``-C`` option of ``sox`` command.
217177
218- * | ``MP3``: Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
219- | VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.
220- * | ``FLAC``: compression level. Whole number from ``0`` to ``8``.
221- | ``8`` is default and highest compression.
222- * | ``OGG/VORBIS``: number from ``-1`` to ``10``; ``-1`` is the highest compression
223- | and lowest quality. Default: ``3``.
178+ ``"mp3"``
179+ Either bitrate (in ``kbps``) with quality factor, such as ``128.2``, or
180+ VBR encoding with quality factor such as ``-4.2``. Default: ``-4.5``.
181+
182+ ``"flac"``
183+ Whole number from ``0`` to ``8``. ``8`` is default and highest compression.
184+
185+ ``"ogg"``, ``"vorbis"``
186+ Number from ``-1`` to ``10``; ``-1`` is the highest compression
187+ and lowest quality. Default: ``3``.
224188
225189 See the detail at http://sox.sourceforge.net/soxformat.html.
226- format (str, optional): Output audio format.
227- This is required when the output audio format cannot be infered from
228- ``filepath``, (such as file extension or ``name`` attribute of the given file object).
229- dtype (str, optional): Output tensor dtype.
230- Valid values: ``"uint8", "int16", "int32", "float32", "float64", None``
231- ``dtype=None`` means no conversion is performed.
232- ``dtype`` parameter is only effective for ``float32`` Tensor.
190+ format (str, optional): Override the audio format.
191+ When ``filepath`` argument is path-like object, audio format is infered from
192+ file extension. If file extension is missing or different, you can specify the
193+ correct format with this argument.
194+
195+ When ``filepath`` argument is file-like object, this argument is required.
196+
197+ Valid values are ``"wav"``, ``"mp3"``, ``"ogg"``, ``"vorbis"``, ``"amr-nb"``,
198+ ``"amb"``, ``"flac"`` and ``"sph"``.
199+ encoding (str, optional): Changes the encoding for the supported formats.
200+ This argument is effective only for supported formats, cush as ``"wav"``, ``""amb"``
201+ and ``"sph"``. Valid values are;
202+
203+ - ``"PCM_S"`` (signed integer Linear PCM)
204+ - ``"PCM_U"`` (unsigned integer Linear PCM)
205+ - ``"PCM_F"`` (floating point PCM)
206+ - ``"ULAW"`` (mu-law)
207+ - ``"ALAW"`` (a-law)
208+
209+ Default values
210+ If not provided, the default value is picked based on ``format`` and ``bits_per_sample``.
211+
212+ ``"wav"``, ``"amb"``
213+ - | If both ``encoding`` and ``bits_per_sample`` are not provided, the ``dtype`` of the
214+ | Tensor is used to determine the default value.
215+ - ``"PCM_U"`` if dtype is ``uint8``
216+ - ``"PCM_S"`` if dtype is ``int16`` or ``int32`
217+ - ``"PCM_F"`` if dtype is ``float32``
218+
219+ - ``"PCM_U"`` if ``bits_per_sample=8``
220+ - ``"PCM_S"`` otherwise
221+
222+ ``"sph"`` format;
223+ - the default value is ``"PCM_S"``
224+
225+ bits_per_sample (int, optional): Changes the bit depth for the supported formats.
226+ When ``format`` is one of ``"wav"``, ``"flac"``, ``"sph"``, or ``"amb"``, you can change the
227+ bit depth. Valid values are ``8``, ``16``, ``32`` and ``64``.
228+
229+ Default Value;
230+ If not provided, the default values are picked based on ``format`` and ``"encoding"``;
231+
232+ ``"wav"``, ``"amb"``;
233+ - | If both ``encoding`` and ``bits_per_sample`` are not provided, the ``dtype`` of the
234+ | Tensor is used.
235+ - ``8`` if dtype is ``uint8``
236+ - ``16`` if dtype is ``int16``
237+ - ``32`` if dtype is ``int32`` or ``float32``
238+
239+ - ``8`` if ``encoding`` is ``"PCM_U"``, ``"ULAW"`` or ``"ALAW"``
240+ - ``16`` if ``encoding`` is ``"PCM_S"``
241+ - ``32`` if ``encoding`` is ``"PCM_F"``
242+
243+ ``"flac"`` format;
244+ - the default value is ``24``
245+
246+ ``"sph"`` format;
247+ - ``16`` if ``encoding`` is ``"PCM_U"``, ``"PCM_S"``, ``"PCM_F"`` or not provided.
248+ - ``8`` if ``encoding`` is ``"ULAW"`` or ``"ALAW"``
249+
250+ ``"amb"`` format;
251+ - ``8`` if ``encoding`` is ``"PCM_U"``, ``"ULAW"`` or ``"ALAW"``
252+ - ``16`` if ``encoding`` is ``"PCM_S"`` or not provided.
253+ - ``32`` if ``encoding`` is ``"PCM_F"``
254+
255+ Supported formats/encodings/bit depth/compression are;
256+
257+ ``"wav"``, ``"amb"``
258+ - 32-bit floating-point PCM
259+ - 32-bit signed integer PCM
260+ - 24-bit signed integer PCM
261+ - 16-bit signed integer PCM
262+ - 8-bit unsigned integer PCM
263+ - 8-bit mu-law
264+ - 8-bit a-law
265+
266+ Note: Default encoding/bit depth is determined by the dtype of the input Tensor.
267+
268+ ``"mp3"``
269+ Fixed bit rate (such as 128kHz) and variable bit rate compression.
270+ Default: VBR with high quality.
271+
272+ ``"flac"``
273+ - 8-bit
274+ - 16-bit
275+ - 24-bit (default)
276+
277+ ``"ogg"``, ``"vorbis"``
278+ - Different quality level. Default: approx. 112kbps
279+
280+ ``"sph"``
281+ - 8-bit signed integer PCM
282+ - 16-bit signed integer PCM
283+ - 24-bit signed integer PCM
284+ - 32-bit signed integer PCM (default)
285+ - 8-bit mu-law
286+ - 8-bit a-law
287+ - 16-bit a-law
288+ - 24-bit a-law
289+ - 32-bit a-law
290+
291+ ``"amr-nb"``
292+ Bitrate ranging from 4.75 kbit/s to 12.2 kbit/s. Default: 4.75 kbit/s
293+
294+ Note:
295+ To save into formats that ``libsox`` does not handle natively, (such as ``"mp3"``,
296+ ``"flac"``, ``"ogg"`` and ``"vorbis"``), your installation of ``torchaudio`` has
297+ to be linked to ``libsox`` and corresponding codec libraries such as ``libmad``
298+ or ``libmp3lame`` etc.
233299 """
234- if src .dtype == torch .float32 and dtype is None :
235- warnings .warn (
236- '`dtype` default value will be changed to `int16` in 0.9 release.'
237- 'Specify `dtype` to suppress this warning.'
238- )
239300 if not torch .jit .is_scripting ():
240- _save (filepath , src , sample_rate , channels_first , compression , format , dtype )
241- return
301+ if hasattr (filepath , 'write' ):
302+ torchaudio ._torchaudio .save_audio_fileobj (
303+ filepath , src , sample_rate , channels_first , compression ,
304+ format , encoding , bits_per_sample )
305+ return
306+ filepath = os .fspath (filepath )
242307 torch .ops .torchaudio .sox_io_save_audio_file (
243- filepath , src , sample_rate , channels_first , compression , format , dtype )
308+ filepath , src , sample_rate , channels_first , compression , format , encoding , bits_per_sample )
244309
245310
246311@_mod_utils .requires_module ('torchaudio._torchaudio' )
0 commit comments