Add save_with_torchcodec, modify save()'s warnings (#3975)

NicolasHug · web-flow · commit 00b0c91db92c · 2025-07-15T17:09:37.000+01:00
diff --git a/docs/source/torchaudio.rst b/docs/source/torchaudio.rst
@@ -9,9 +9,11 @@ torchaudio
 
     - Most APIs listed below are deprecated in 2.8 and will be removed in 2.9.
     - The decoding and encoding capabilities of PyTorch for both audio and video
-      are being consolidated into TorchCodec. We provide
-      ``torchaudio.load_with_torchcodec()`` as a replacement for
-      ``torchaudio.load()``.
+      are being consolidated into TorchCodec. For convenience, we provide
+      :func:`~torchaudio.load_with_torchcodec` as a replacement for
+      :func:`~torchaudio.load` and :func:`~torchaudio.save_with_torchcodec` as a
+      replacement for :func:`~torchaudio.save`, but we recommend that you port
+      your code to native torchcodec APIs.
 
     Please see https://github.com/pytorch/audio/issues/3902 for more information.
 
@@ -30,6 +32,7 @@ it easy to handle audio data.
    load
    load_with_torchcodec
    save
+   save_with_torchcodec
    list_audio_backends
 
 .. _backend:
diff --git a/src/torchaudio/__init__.py b/src/torchaudio/__init__.py
@@ -8,16 +8,15 @@
     info as _info,
     list_audio_backends as _list_audio_backends,
     load,
-    save as _save,
+    save,
     set_audio_backend as _set_audio_backend,
 )
-from ._torchcodec import load_with_torchcodec
+from ._torchcodec import load_with_torchcodec, save_with_torchcodec
 
 AudioMetaData = dropping_class_io_support(_AudioMetaData)
 get_audio_backend = dropping_io_support(_get_audio_backend)
 info = dropping_io_support(_info)
 list_audio_backends = dropping_io_support(_list_audio_backends)
-save = dropping_io_support(_save)
 set_audio_backend = dropping_io_support(_set_audio_backend)
 
 from . import (  # noqa: F401
@@ -46,6 +45,7 @@
     "AudioMetaData",
     "load",
     "load_with_torchcodec",
+    "save_with_torchcodec",
     "info",
     "save",
     "io",
diff --git a/src/torchaudio/_backend/utils.py b/src/torchaudio/_backend/utils.py
@@ -252,6 +252,14 @@ def save(
     ):
         """Save audio data to file.
 
+        .. warning::
+            In 2.9, this function's implementation will be changed to use
+            :func:`~torchaudio.save_with_torchcodec` under the hood. Some
+            parameters like format, encoding, bits_per_sample, buffer_size, and
+            ``backend`` will be ignored. We recommend that you port your code to
+            rely directly on TorchCodec's decoder instead:
+            https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder
+
         Note:
             The formats this function can handle depend on the availability of backends.
             Please use the following functions to fetch the supported formats.
@@ -326,6 +334,14 @@ def save(
                 Refer to http://sox.sourceforge.net/soxformat.html for more details.
 
         """
+        warnings.warn(
+            "In 2.9, this function's implementation will be changed to use "
+            "torchaudio.save_with_torchcodec` under the hood. Some "
+            "parameters like format, encoding, bits_per_sample, buffer_size, and "
+            "``backend`` will be ignored. We recommend that you port your code to "
+            "rely directly on TorchCodec's encoder instead: "
+            "https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder"
+        )
         backend = dispatcher(uri, format, backend)
         return backend.save(
             uri, src, sample_rate, channels_first, format, encoding, bits_per_sample, buffer_size, compression
diff --git a/src/torchaudio/_torchcodec.py b/src/torchaudio/_torchcodec.py
@@ -20,15 +20,16 @@ def load_with_torchcodec(
     
     .. note::
         
-        This function supports the same API as ``torchaudio.load()``, and relies
-        on TorchCodec's decoding capabilities under the hood. It is provided for
-        convenience, but we do recommend that you port your code to natively use
-        ``torchcodec``'s ``AudioDecoder`` class for better performance:
+        This function supports the same API as :func:`~torchaudio.load`, and
+        relies on TorchCodec's decoding capabilities under the hood. It is
+        provided for convenience, but we do recommend that you port your code to
+        natively use ``torchcodec``'s ``AudioDecoder`` class for better
+        performance:
         https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.decoders.AudioDecoder.
-        In TorchAudio 2.9, ``torchaudio.load()`` will be relying on
-        ``load_with_torchcodec``. Note that some parameters of
-        ``torchaudio.load()``, like ``normalize``, ``buffer_size``, and
-        ``backend``, are ignored by ``load_with_torchcodec``.
+        In TorchAudio 2.9, :func:`~torchaudio.load` will be relying on
+        :func:`~torchaudio.load_with_torchcodec`. Note that some parameters of
+        :func:`~torchaudio.load`, like ``normalize``, ``buffer_size``, and
+        ``backend``, are ignored by :func:`~torchaudio.load_with_torchcodec`.
     
     
     Args:
@@ -158,4 +159,194 @@ def load_with_torchcodec(
     if not channels_first:
         data = data.transpose(0, 1)  # [channel, time] -> [time, channel]
     
-    return data, sample_rate
+    return data, sample_rate
+
+
+def save_with_torchcodec(
+    uri: Union[str, os.PathLike],
+    src: torch.Tensor,
+    sample_rate: int,
+    channels_first: bool = True,
+    format: Optional[str] = None,
+    encoding: Optional[str] = None,
+    bits_per_sample: Optional[int] = None,
+    buffer_size: int = 4096,
+    backend: Optional[str] = None,
+    compression: Optional[Union[float, int]] = None,
+) -> None:
+    """Save audio data to file using TorchCodec's AudioEncoder.
+
+    .. note::
+        
+        This function supports the same API as :func:`~torchaudio.save`, and
+        relies on TorchCodec's encoding capabilities under the hood. It is
+        provided for convenience, but we do recommend that you port your code to
+        natively use ``torchcodec``'s ``AudioEncoder`` class for better
+        performance:
+        https://docs.pytorch.org/torchcodec/stable/generated/torchcodec.encoders.AudioEncoder.
+        In TorchAudio 2.9, :func:`~torchaudio.save` will be relying on
+        :func:`~torchaudio.save_with_torchcodec`. Note that some parameters of
+        :func:`~torchaudio.save`, like ``format``, ``encoding``,
+        ``bits_per_sample``, ``buffer_size``, and ``backend``, are ignored by
+        are ignored by :func:`~torchaudio.save_with_torchcodec`.
+    
+    This function provides a TorchCodec-based alternative to torchaudio.save
+    with the same API. TorchCodec's AudioEncoder provides efficient encoding
+    with FFmpeg under the hood.
+    
+    Args:
+        uri (path-like object):
+            Path to save the audio file. The file extension determines the format.
+            
+        src (torch.Tensor):
+            Audio data to save. Must be a 1D or 2D tensor with float32 values
+            in the range [-1, 1]. If 2D, shape should be [channel, time] when
+            channels_first=True, or [time, channel] when channels_first=False.
+            
+        sample_rate (int):
+            Sample rate of the audio data.
+            
+        channels_first (bool, optional):
+            Indicates whether the input tensor has channels as the first dimension.
+            If True, expects [channel, time]. If False, expects [time, channel].
+            Default: True.
+            
+        format (str or None, optional):
+            Audio format hint. Not used by TorchCodec (format is determined by
+            file extension). A warning is issued if provided.
+            Default: None.
+            
+        encoding (str or None, optional):
+            Audio encoding. Not fully supported by TorchCodec AudioEncoder.
+            A warning is issued if provided. Default: None.
+            
+        bits_per_sample (int or None, optional):
+            Bits per sample. Not directly supported by TorchCodec AudioEncoder.
+            A warning is issued if provided. Default: None.
+            
+        buffer_size (int, optional):
+            Not used by TorchCodec AudioEncoder. Provided for API compatibility.
+            A warning is issued if not default value. Default: 4096.
+            
+        backend (str or None, optional):
+            Not used by TorchCodec AudioEncoder. Provided for API compatibility.
+            A warning is issued if provided. Default: None.
+            
+        compression (float, int or None, optional):
+            Compression level or bit rate. Maps to bit_rate parameter in
+            TorchCodec AudioEncoder. Default: None.
+            
+    Raises:
+        ImportError: If torchcodec is not available.
+        ValueError: If input parameters are invalid.
+        RuntimeError: If TorchCodec fails to encode the audio.
+        
+    Note:
+        - TorchCodec AudioEncoder expects float32 samples in [-1, 1] range.
+        - Some parameters (format, encoding, bits_per_sample, buffer_size, backend)
+          are not used by TorchCodec but are provided for API compatibility.
+        - The output format is determined by the file extension in the uri.
+        - TorchCodec uses FFmpeg under the hood for encoding.
+    """
+    # Import torchcodec here to provide clear error if not available
+    try:
+        from torchcodec.encoders import AudioEncoder
+    except ImportError as e:
+        raise ImportError(
+            "TorchCodec is required for save_with_torchcodec. "
+            "Please install torchcodec to use this function."
+        ) from e
+    
+    # Parameter validation and warnings
+    if format is not None:
+        import warnings
+        warnings.warn(
+            "The 'format' parameter is not used by TorchCodec AudioEncoder. "
+            "Format is determined by the file extension.",
+            UserWarning,
+            stacklevel=2
+        )
+    
+    if encoding is not None:
+        import warnings
+        warnings.warn(
+            "The 'encoding' parameter is not fully supported by TorchCodec AudioEncoder.",
+            UserWarning,
+            stacklevel=2
+        )
+    
+    if bits_per_sample is not None:
+        import warnings
+        warnings.warn(
+            "The 'bits_per_sample' parameter is not directly supported by TorchCodec AudioEncoder.",
+            UserWarning,
+            stacklevel=2
+        )
+    
+    if buffer_size != 4096:
+        import warnings
+        warnings.warn(
+            "The 'buffer_size' parameter is not used by TorchCodec AudioEncoder.",
+            UserWarning,
+            stacklevel=2
+        )
+        
+    if backend is not None:
+        import warnings
+        warnings.warn(
+            "The 'backend' parameter is not used by TorchCodec AudioEncoder.",
+            UserWarning,
+            stacklevel=2
+        )
+    
+    # Input validation
+    if not isinstance(src, torch.Tensor):
+        raise ValueError(f"Expected src to be a torch.Tensor, got {type(src)}")
+    
+    if src.dtype != torch.float32:
+        src = src.float()
+    
+    if sample_rate <= 0:
+        raise ValueError(f"sample_rate must be positive, got {sample_rate}")
+    
+    # Handle tensor shape and channels_first
+    if src.ndim == 1:
+        # Convert to 2D: [1, time] for channels_first=True
+        if channels_first:
+            data = src.unsqueeze(0)  # [1, time]
+        else:
+            # For channels_first=False, input is [time] -> reshape to [time, 1] -> transpose to [1, time]
+            data = src.unsqueeze(1).transpose(0, 1)  # [time, 1] -> [1, time]
+    elif src.ndim == 2:
+        if channels_first:
+            data = src  # Already [channel, time]
+        else:
+            data = src.transpose(0, 1)  # [time, channel] -> [channel, time]
+    else:
+        raise ValueError(f"Expected 1D or 2D tensor, got {src.ndim}D tensor")
+    
+    # Create AudioEncoder
+    try:
+        encoder = AudioEncoder(data, sample_rate=sample_rate)
+    except Exception as e:
+        raise RuntimeError(f"Failed to create AudioEncoder: {e}") from e
+    
+    # Determine bit_rate from compression parameter
+    bit_rate = None
+    if compression is not None:
+        if isinstance(compression, (int, float)):
+            bit_rate = int(compression)
+        else:
+            import warnings
+            warnings.warn(
+                f"Unsupported compression type {type(compression)}. "
+                "TorchCodec AudioEncoder expects int or float for bit_rate.",
+                UserWarning,
+                stacklevel=2
+            )
+    
+    # Save to file
+    try:
+        encoder.to_file(uri, bit_rate=bit_rate)
+    except Exception as e:
+        raise RuntimeError(f"Failed to save audio to {uri}: {e}") from e
diff --git a/test/torchaudio_unittest/test_load_save_torchcodec.py b/test/torchaudio_unittest/test_load_save_torchcodec.py
diff --git a/test/torchaudio_unittest/test_load_torchcodec.py b/test/torchaudio_unittest/test_load_torchcodec.py