Add tuto

NicolasHug · NicolasHug · commit ee050589c949 · 2025-06-09T13:32:58.000+01:00
diff --git a/examples/decoding/README.rst b/examples/decoding/README.rst
@@ -1,3 +1,2 @@
 Decoding
 --------
-
diff --git a/examples/encoding/audio_encoding.py b/examples/encoding/audio_encoding.py
@@ -14,4 +14,78 @@
 """
 
 # %%
-print("hello")
+# Let's first generate some samples to be encoded. The data to be encoded could
+# also just come from an :class:`~torchcodec.decoders.AudioDecoder`!
+import torch
+from IPython.display import Audio as play_audio
+
+
+def make_sinewave() -> tuple[torch.Tensor, int]:
+    freq_A = 440  # Hz
+    sample_rate = 16000  # Hz
+    duration_seconds = 3  # seconds
+    t = torch.linspace(0, duration_seconds, int(sample_rate * duration_seconds), dtype=torch.float32)
+    return torch.sin(2 * torch.pi * freq_A * t), sample_rate
+
+
+samples, sample_rate = make_sinewave()
+
+print(f"Encoding samples with {samples.shape = } and {sample_rate = }")
+play_audio(samples, rate=sample_rate)
+
+# %%
+# We first instantiate an :class:`~torchcodec.encoders.AudioEncoder`. We pass it
+# the samples to be encoded. The samples must a 2D tensors of shape
+# ``(num_channels, num_samples)``, or in this case, a 1D tensor where
+# ``num_channels`` is assumed to be 1. The values must be float values
+# normalized in ``[-1, 1]``: this is also what the
+# :class:`~torchcodec.decoders.AudioDecoder` would return.
+#
+# .. note::
+#
+#     The ``sample_rate`` parameter corresponds to the sample rate of the
+#     *input*, not the desired encoded sample rate.
+from torchcodec.encoders import AudioEncoder
+
+encoder = AudioEncoder(samples=samples, sample_rate=sample_rate)
+
+
+# %%
+# :class:`~torchcodec.encoders.AudioEncoder` supports encoding samples into a
+# file via the :meth:`~torchcodec.encoders.AudioEncoder.to_file` method, or to
+# raw bytes via :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`.  For the
+# purpose of this tutorial we'll use
+# :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`, so that we can easily
+# re-decode the encoded samples and check their properies. The
+# :meth:`~torchcodec.encoders.AudioEncoder.to_file` method works very similarly.
+
+encoded_samples = encoder.to_tensor(format="mp3")
+print(f"{encoded_samples.shape = }, {encoded_samples.dtype = }")
+
+
+# %%
+# That's it!
+#
+# Now that we have our encoded data, we can decode it back, to make sure it
+# looks and sounds as expected:
+from torchcodec.decoders import AudioDecoder
+
+samples_back = AudioDecoder(encoded_samples).get_all_samples()
+
+print(samples_back)
+play_audio(samples_back.data, rate=samples_back.sample_rate)
+
+# %%
+# The encoder supports some encoding options that allow you to change how to
+# data is encoded. For example, we can decide to encode our mono data (1
+# channel) into stereo data (2 channels):
+encoded_samples = encoder.to_tensor(format="wav", num_channels=2)
+
+stereo_samples_back = AudioDecoder(encoded_samples).get_all_samples()
+
+print(stereo_samples_back)
+play_audio(stereo_samples_back.data, rate=stereo_samples_back.sample_rate)
+
+# %%
+# Check the docstring of the encoding methods to learn about the different
+# encoding options.
diff --git a/src/torchcodec/encoders/_audio_encoder.py b/src/torchcodec/encoders/_audio_encoder.py
@@ -12,8 +12,10 @@ class AudioEncoder:
 
     Args:
         samples (``torch.Tensor``): The samples to encode. This must be a 2D
-            tensor of shape ``(num_channels, num_samples)``
-        sample_rate (int): The sample rate of the **input** ``samples``.    
+            tensor of shape ``(num_channels, num_samples)``, or a 1D tensor in
+            which case ``num_channels = 1`` is assumed. Values must be float
+            values in ``[-1, 1]``.
+        sample_rate (int): The sample rate of the **input** ``samples``.
     """
 
     def __init__(self, samples: Tensor, *, sample_rate: int):
@@ -24,8 +26,11 @@ def __init__(self, samples: Tensor, *, sample_rate: int):
             raise ValueError(
                 f"Expected samples to be a Tensor, got {type(samples) = }."
             )
+        if samples.ndim == 1:
+            # make it 2D and assume 1 channel
+            samples = samples[None, :]
         if samples.ndim != 2:
-            raise ValueError(f"Expected 2D samples, got {samples.shape = }.")
+            raise ValueError(f"Expected 1D or 2D samples, got {samples.shape = }.")
         if samples.dtype != torch.float32:
             raise ValueError(f"Expected float32 samples, got {samples.dtype = }.")
         if sample_rate <= 0:
diff --git a/test/test_encoders.py b/test/test_encoders.py
@@ -26,8 +26,8 @@ def decode(self, source) -> torch.Tensor:
     def test_bad_input(self):
         with pytest.raises(ValueError, match="Expected samples to be a Tensor"):
             AudioEncoder(samples=123, sample_rate=32_000)
-        with pytest.raises(ValueError, match="Expected 2D samples"):
-            AudioEncoder(samples=torch.rand(10), sample_rate=32_000)
+        with pytest.raises(ValueError, match="Expected 1D or 2D samples"):
+            AudioEncoder(samples=torch.rand(3, 4, 5), sample_rate=32_000)
         with pytest.raises(ValueError, match="Expected float32 samples"):
             AudioEncoder(
                 samples=torch.rand(10, 10, dtype=torch.float64), sample_rate=32_000