|
14 | 14 | """ |
15 | 15 |
|
16 | 16 | # %% |
17 | | -print("hello") |
| 17 | +# Let's first generate some samples to be encoded. The data to be encoded could |
| 18 | +# also just come from an :class:`~torchcodec.decoders.AudioDecoder`! |
| 19 | +import torch |
| 20 | +from IPython.display import Audio as play_audio |
| 21 | + |
| 22 | + |
| 23 | +def make_sinewave() -> tuple[torch.Tensor, int]: |
| 24 | + freq_A = 440 # Hz |
| 25 | + sample_rate = 16000 # Hz |
| 26 | + duration_seconds = 3 # seconds |
| 27 | + t = torch.linspace(0, duration_seconds, int(sample_rate * duration_seconds), dtype=torch.float32) |
| 28 | + return torch.sin(2 * torch.pi * freq_A * t), sample_rate |
| 29 | + |
| 30 | + |
| 31 | +samples, sample_rate = make_sinewave() |
| 32 | + |
| 33 | +print(f"Encoding samples with {samples.shape = } and {sample_rate = }") |
| 34 | +play_audio(samples, rate=sample_rate) |
| 35 | + |
| 36 | +# %% |
| 37 | +# We first instantiate an :class:`~torchcodec.encoders.AudioEncoder`. We pass it |
| 38 | +# the samples to be encoded. The samples must a 2D tensors of shape |
| 39 | +# ``(num_channels, num_samples)``, or in this case, a 1D tensor where |
| 40 | +# ``num_channels`` is assumed to be 1. The values must be float values |
| 41 | +# normalized in ``[-1, 1]``: this is also what the |
| 42 | +# :class:`~torchcodec.decoders.AudioDecoder` would return. |
| 43 | +# |
| 44 | +# .. note:: |
| 45 | +# |
| 46 | +# The ``sample_rate`` parameter corresponds to the sample rate of the |
| 47 | +# *input*, not the desired encoded sample rate. |
| 48 | +from torchcodec.encoders import AudioEncoder |
| 49 | + |
| 50 | +encoder = AudioEncoder(samples=samples, sample_rate=sample_rate) |
| 51 | + |
| 52 | + |
| 53 | +# %% |
| 54 | +# :class:`~torchcodec.encoders.AudioEncoder` supports encoding samples into a |
| 55 | +# file via the :meth:`~torchcodec.encoders.AudioEncoder.to_file` method, or to |
| 56 | +# raw bytes via :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`. For the |
| 57 | +# purpose of this tutorial we'll use |
| 58 | +# :meth:`~torchcodec.encoders.AudioEncoder.to_tensor`, so that we can easily |
| 59 | +# re-decode the encoded samples and check their properies. The |
| 60 | +# :meth:`~torchcodec.encoders.AudioEncoder.to_file` method works very similarly. |
| 61 | + |
| 62 | +encoded_samples = encoder.to_tensor(format="mp3") |
| 63 | +print(f"{encoded_samples.shape = }, {encoded_samples.dtype = }") |
| 64 | + |
| 65 | + |
| 66 | +# %% |
| 67 | +# That's it! |
| 68 | +# |
| 69 | +# Now that we have our encoded data, we can decode it back, to make sure it |
| 70 | +# looks and sounds as expected: |
| 71 | +from torchcodec.decoders import AudioDecoder |
| 72 | + |
| 73 | +samples_back = AudioDecoder(encoded_samples).get_all_samples() |
| 74 | + |
| 75 | +print(samples_back) |
| 76 | +play_audio(samples_back.data, rate=samples_back.sample_rate) |
| 77 | + |
| 78 | +# %% |
| 79 | +# The encoder supports some encoding options that allow you to change how to |
| 80 | +# data is encoded. For example, we can decide to encode our mono data (1 |
| 81 | +# channel) into stereo data (2 channels): |
| 82 | +encoded_samples = encoder.to_tensor(format="wav", num_channels=2) |
| 83 | + |
| 84 | +stereo_samples_back = AudioDecoder(encoded_samples).get_all_samples() |
| 85 | + |
| 86 | +print(stereo_samples_back) |
| 87 | +play_audio(stereo_samples_back.data, rate=stereo_samples_back.sample_rate) |
| 88 | + |
| 89 | +# %% |
| 90 | +# Check the docstring of the encoding methods to learn about the different |
| 91 | +# encoding options. |
0 commit comments