pytorch
diff --git a/‎docs/source/index.rst
Lines changed: 2 additions & 2 deletions b/‎docs/source/index.rst
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/asr/emformer_rnnt/mustc/dataset.py
Lines changed: 2 additions & 1 deletion b/‎examples/asr/emformer_rnnt/mustc/dataset.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/avsr/data_prep/data/data_module.py
Lines changed: 2 additions & 2 deletions b/‎examples/avsr/data_prep/data/data_module.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/avsr/lrs3.py
Lines changed: 2 additions & 1 deletion b/‎examples/avsr/lrs3.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/dnn_beamformer/datamodule.py
Lines changed: 4 additions & 3 deletions b/‎examples/dnn_beamformer/datamodule.py
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/hubert/dataset/hubert_dataset.py
Lines changed: 4 additions & 1 deletion b/‎examples/hubert/dataset/hubert_dataset.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎examples/hubert/utils/feature_utils.py
Lines changed: 3 additions & 2 deletions b/‎examples/hubert/utils/feature_utils.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/libtorchaudio/augmentation/create_jittable_pipeline.py
Lines changed: 3 additions & 3 deletions b/‎examples/libtorchaudio/augmentation/create_jittable_pipeline.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎examples/libtorchaudio/speech_recognition/build_pipeline_from_fairseq.py
Lines changed: 2 additions & 1 deletion b/‎examples/libtorchaudio/speech_recognition/build_pipeline_from_fairseq.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/libtorchaudio/speech_recognition/build_pipeline_from_huggingface_transformers.py
Lines changed: 2 additions & 1 deletion b/‎examples/libtorchaudio/speech_recognition/build_pipeline_from_huggingface_transformers.py
Lines changed: 2 additions & 1 deletion
@@ -182,7 +182,7 @@ Tutorials
 
 .. customcarditem::
    :header: Loading waveform Tensors from files and saving them
-   :card_description: Learn how to query/load audio files and save waveform tensors to files, using <code>torchaudio.info</code>, <code>torchaudio.load</code> and <code>torchaudio.save</code> functions.
+   :card_description: Learn how to query/load audio files and save waveform tensors to files, using <code>torchaudio.info</code>, <code>torchaudio.utils.load_torchcodec</code> and <code>torchaudio.save</code> functions.
    :image: https://download.pytorch.org/torchaudio/tutorial-assets/thumbnails/audio_io_tutorial.png
    :link: tutorials/audio_io_tutorial.html
    :tags: I/O
@@ -399,7 +399,7 @@ In BibTeX format:
 .. code-block:: bibtex
 
    @misc{hwang2023torchaudio,
-      title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch}, 
+      title={TorchAudio 2.1: Advancing speech recognition, self-supervised learning, and audio processing components for PyTorch},
       author={Jeff Hwang and Moto Hira and Caroline Chen and Xiaohui Zhang and Zhaoheng Ni and Guangzhi Sun and Pingchuan Ma and Ruizhe Huang and Vineel Pratap and Yuekai Zhang and Anurag Kumar and Chin-Yun Yu and Chuang Zhu and Chunxi Liu and Jacob Kahn and Mirco Ravanelli and Peng Sun and Shinji Watanabe and Yangyang Shi and Yumeng Tao and Robin Scheibler and Samuele Cornell and Sean Kim and Stavros Petridis},
       year={2023},
       eprint={2310.17864},
 
@@ -4,6 +4,7 @@
 import torch
 import torchaudio
 import yaml
+from torchaudio.utils import load_torchcodec
 
 
 FOLDER_IN_ARCHIVE = "en-de"
@@ -39,7 +40,7 @@ def __init__(
 
     def _get_mustc_item(self, idx):
         file_path, offset, duration = self.wav_list[idx]
-        waveform, sr = torchaudio.load(file_path, frame_offset=offset, num_frames=duration)
+        waveform, sr = load_torchcodec(file_path, frame_offset=offset, num_frames=duration)
         assert sr == SAMPLE_RATE
         transcript = self.trans_list[idx].replace("\n", "")
         return (waveform, transcript)
 
@@ -7,7 +7,7 @@
 import torch
 import torchaudio
 import torchvision
-
+from torchaudio.utils import load_torchcodec
 
 class AVSRDataLoader:
     def __init__(self, modality, detector="retinaface", resize=None):
@@ -39,7 +39,7 @@ def load_data(self, data_filename, transform=True):
             return video
 
     def load_audio(self, data_filename):
-        waveform, sample_rate = torchaudio.load(data_filename, normalize=True)
+        waveform, sample_rate = load_torchcodec(data_filename, normalize=True)
         return waveform, sample_rate
 
     def load_video(self, data_filename):
 
@@ -3,6 +3,7 @@
 import torchaudio
 import torchvision
 from torch.utils.data import Dataset
+from torchaudio.utils import load_torchcodec
 
 
 def _load_list(args, *filenames):
@@ -31,7 +32,7 @@ def load_audio(path):
     """
     rtype: torch, T x 1
     """
-    waveform, sample_rate = torchaudio.load(path, normalize=True)
+    waveform, sample_rate = load_torchcodec(path, normalize=True)
     return waveform.transpose(1, 0)
 
 
 
@@ -8,6 +8,7 @@
 from torch import Tensor
 from torch.utils.data import Dataset
 from utils import CollateFnL3DAS22
+from torchaudio.utils import load_torchcodec
 
 _PREFIX = "L3DAS22_Task1_"
 _SUBSETS = {
@@ -46,10 +47,10 @@ def __getitem__(self, n: int) -> Tuple[Tensor, Tensor, int, str]:
         noisy_path_B = str(noisy_path_A).replace("_A.wav", "_B.wav")
         clean_path = noisy_path_A.parent.parent / "labels" / noisy_path_A.name.replace("_A.wav", ".wav")
         transcript_path = str(clean_path).replace("wav", "txt")
-        waveform_noisy_A, sample_rate1 = torchaudio.load(noisy_path_A)
-        waveform_noisy_B, sample_rate2 = torchaudio.load(noisy_path_B)
+        waveform_noisy_A, sample_rate1 = load_torchcodec(noisy_path_A)
+        waveform_noisy_B, sample_rate2 = load_torchcodec(noisy_path_B)
         waveform_noisy = torch.cat((waveform_noisy_A, waveform_noisy_B), dim=0)
-        waveform_clean, sample_rate3 = torchaudio.load(clean_path)
+        waveform_clean, sample_rate3 = load_torchcodec(clean_path)
         assert sample_rate1 == _SAMPLE_RATE and sample_rate2 == _SAMPLE_RATE and sample_rate3 == _SAMPLE_RATE
         with open(transcript_path, "r") as f:
             transcript = f.readline()
 
@@ -12,6 +12,9 @@
 from torch import Tensor
 from torch.utils.data import BatchSampler, Dataset, DistributedSampler
 
+from torchaudio.utils import load_torchcodec
+
+
 sys.path.append("..")
 from utils import _get_label2id
 
@@ -299,7 +302,7 @@ def _load_audio(self, index: int) -> Tensor:
             (Tensor): The corresponding waveform Tensor.
         """
         wav_path = self.f_list[index]
-        waveform, sample_rate = torchaudio.load(wav_path)
+        waveform, sample_rate = load_torchcodec(wav_path)
         assert waveform.shape[1] == self.len_list[index]
         return waveform
 
 
@@ -13,6 +13,7 @@
 from torch.nn import Module
 
 from .common_utils import _get_feat_lens_paths
+from torchaudio.utils import load_torchcodec
 
 _LG = logging.getLogger(__name__)
 _DEFAULT_DEVICE = torch.device("cpu")
@@ -53,7 +54,7 @@ def extract_feature_mfcc(
     Returns:
         Tensor: The desired feature tensor of the given audio file.
     """
-    waveform, sr = torchaudio.load(path)
+    waveform, sr = load_torchcodec(path)
     assert sr == sample_rate
     feature_extractor = torchaudio.transforms.MFCC(
         sample_rate=sample_rate, n_mfcc=13, melkwargs={"n_fft": 400, "hop_length": 160, "center": False}
@@ -88,7 +89,7 @@ def extract_feature_hubert(
     Returns:
         Tensor: The desired feature tensor of the given audio file.
     """
-    waveform, sr = torchaudio.load(path)
+    waveform, sr = load_torchcodec(path)
     assert sr == sample_rate
     waveform = waveform.to(device)
     with torch.inference_mode():
 
@@ -7,7 +7,7 @@
 
 import torch
 import torchaudio
-
+from torchaudio.utils import load_torchcodec
 
 class Pipeline(torch.nn.Module):
     """Example audio process pipeline.
@@ -17,15 +17,15 @@ class Pipeline(torch.nn.Module):
 
     def __init__(self, rir_path: str):
         super().__init__()
-        rir, sample_rate = torchaudio.load(rir_path)
+        rir, sample_rate = load_torchcodec(rir_path)
         self.register_buffer("rir", rir)
         self.rir_sample_rate: int = sample_rate
 
     def forward(self, input_path: str, output_path: str):
         torchaudio.sox_effects.init_sox_effects()
 
         # 1. load audio
-        waveform, sample_rate = torchaudio.load(input_path)
+        waveform, sample_rate = load_torchcodec(input_path)
 
         # 2. Add background noise
         alpha = 0.01
 
@@ -14,6 +14,7 @@
 from greedy_decoder import Decoder
 from torch.utils.mobile_optimizer import optimize_for_mobile
 from torchaudio.models.wav2vec2.utils.import_fairseq import import_fairseq_model
+from torchaudio.utils import load_torchcodec
 
 TORCH_VERSION: Tuple[int, ...] = tuple(int(x) for x in torch.__version__.split(".")[:2])
 if TORCH_VERSION >= (1, 10):
@@ -58,7 +59,7 @@ def _parse_args():
 
 class Loader(torch.nn.Module):
     def forward(self, audio_path: str) -> torch.Tensor:
-        waveform, sample_rate = torchaudio.load(audio_path)
+        waveform, sample_rate = load_torchcodec(audio_path)
         if sample_rate != 16000:
             waveform = torchaudio.functional.resample(waveform, float(sample_rate), 16000.0)
         return waveform
 
@@ -8,6 +8,7 @@
 import torchaudio
 from greedy_decoder import Decoder
 from torchaudio.models.wav2vec2.utils.import_huggingface import import_huggingface_model
+from torchaudio.utils import load_torchcodec
 
 TORCH_VERSION: Tuple[int, ...] = tuple(int(x) for x in torch.__version__.split(".")[:2])
 if TORCH_VERSION >= (1, 10):
@@ -49,7 +50,7 @@ def _parse_args():
 
 class Loader(torch.nn.Module):
     def forward(self, audio_path: str) -> torch.Tensor:
-        waveform, sample_rate = torchaudio.load(audio_path)
+        waveform, sample_rate = load_torchcodec(audio_path)
         if sample_rate != 16000:
             waveform = torchaudio.functional.resample(waveform, float(sample_rate), 16000.0)
         return waveform