pytorch
diff --git a/‎.github/scripts/unittest-linux/install.sh
Lines changed: 2 additions & 54 deletions b/‎.github/scripts/unittest-linux/install.sh
Lines changed: 2 additions & 54 deletions
diff --git a/‎.github/scripts/unittest-linux/run_test.sh
Lines changed: 5 additions & 1 deletion b/‎.github/scripts/unittest-linux/run_test.sh
Lines changed: 5 additions & 1 deletion
diff --git a/‎.github/scripts/unittest-windows/environment.yml
Lines changed: 0 additions & 1 deletion b/‎.github/scripts/unittest-windows/environment.yml
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/scripts/unittest-windows/install.sh
Lines changed: 1 addition & 3 deletions b/‎.github/scripts/unittest-windows/install.sh
Lines changed: 1 addition & 3 deletions
diff --git a/‎.github/workflows/unittest-linux-gpu.yml
Lines changed: 2 additions & 33 deletions b/‎.github/workflows/unittest-linux-gpu.yml
Lines changed: 2 additions & 33 deletions
diff --git a/‎docs/requirements-tutorials.txt
Lines changed: 0 additions & 10 deletions b/‎docs/requirements-tutorials.txt
Lines changed: 0 additions & 10 deletions
diff --git a/‎docs/source/conf.py
Lines changed: 5 additions & 1 deletion b/‎docs/source/conf.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎examples/hubert/utils/kmeans.py
Lines changed: 1 addition & 1 deletion b/‎examples/hubert/utils/kmeans.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/tutorials/audio_feature_extractions_tutorial.py
Lines changed: 2 additions & 2 deletions b/‎examples/tutorials/audio_feature_extractions_tutorial.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/tutorials/tacotron2_pipeline_tutorial.py
Lines changed: 4 additions & 57 deletions b/‎examples/tutorials/tacotron2_pipeline_tutorial.py
Lines changed: 4 additions & 57 deletions
@@ -24,41 +24,7 @@ esac
 conda create -n ci -y python="${PYTHON_VERSION}"
 conda activate ci
 
-# 1. Install PyTorch
-# if [ -z "${CUDA_VERSION:-}" ] ; then
-#     if [ "${os}" == MacOSX ] ; then
-#         cudatoolkit=''
-#     else
-#         cudatoolkit="cpuonly"
-#     fi
-#     version="cpu"
-# else
-#     version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
-#     export CUDATOOLKIT_CHANNEL="nvidia"
-#     cudatoolkit="pytorch-cuda=${version}"
-# fi
-
-# printf "Installing PyTorch with %s\n" "${cudatoolkit}"
-# (
-#     if [ "${os}" == MacOSX ] ; then
-#       # TODO: this can be removed as soon as linking issue could be resolved
-#       #  see https://github.com/pytorch/pytorch/issues/62424 from details
-#       MKL_CONSTRAINT='mkl==2021.2.0'
-#       pytorch_build=pytorch
-#     else
-#       MKL_CONSTRAINT=''
-#       pytorch_build="pytorch[build="*${version}*"]"
-#     fi
-#     set -x
-
-#     if [[ -z "$cudatoolkit" ]]; then
-#         conda install ${CONDA_CHANNEL_FLAGS:-} -y -c "pytorch-${UPLOAD_CHANNEL}" $MKL_CONSTRAINT "pytorch-${UPLOAD_CHANNEL}::${pytorch_build}"
-#     else
-#         conda install pytorch ${cudatoolkit} ${CONDA_CHANNEL_FLAGS:-} -y -c "pytorch-${UPLOAD_CHANNEL}" -c nvidia  $MKL_CONSTRAINT
-#     fi
-# )
-
-export GPU_ARCH_TYPE="cpu"  # TODO change this
+export GPU_ARCH_TYPE="cpu"
 
 case $GPU_ARCH_TYPE in
   cpu)
@@ -90,22 +56,4 @@ printf "* Installing test tools\n"
 conda install -y "ffmpeg<5"
 python -c "import torch; import torchaudio; import torchcodec; print(torch.__version__, torchaudio.__version__, torchcodec.__version__)"
 
-NUMBA_DEV_CHANNEL=""
-if [[ "$(python --version)" = *3.9* || "$(python --version)" = *3.10* ]]; then
-    # Numba isn't available for Python 3.9 and 3.10 except on the numba dev channel and building from source fails
-    # See https://github.com/librosa/librosa/issues/1270#issuecomment-759065048
-    NUMBA_DEV_CHANNEL="-c numba/label/dev"
-fi
-(
-    set -x
-    conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} sox libvorbis parameterized 'requests>=2.20'
-    pip install kaldi-io SoundFile librosa coverage pytest pytest-cov scipy expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag pyroomacoustics flashlight-text git+https://github.com/kpu/kenlm
-
-    # TODO: might be better to fix the single call to `pip install` above
-    pip install "pillow<10.0" "scipy<1.10" "numpy<2.0"
-)
-# Install fairseq
-git clone https://github.com/pytorch/fairseq
-cd fairseq
-git checkout e47a4c8
-pip install .
+pip3 install parameterized requests coverage pytest pytest-cov scipy numpy expecttest
@@ -29,6 +29,10 @@ fi
 )
 
 (
+    export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CTC_DECODER=true
+    export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true
+    export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true
+    export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true
     cd test
-    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"
+    pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"
 )
@@ -8,7 +8,6 @@ dependencies:
   - scipy >= 1.4.1
   - pip
   - pip:
-    - kaldi-io
     - PySoundFile
     - future
     - parameterized
 
@@ -64,9 +64,8 @@ case "$(python --version)" in
         NUMBA_DEV_CHANNEL="-c numba/label/dev"
         ;;
 esac
-# Note: installing librosa via pip fail because it will try to compile numba.
 (
-    conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa==0.10.0' parameterized 'requests>=2.20'
+    conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} parameterized 'requests>=2.20'
     # Need to disable shell check since this'll fail out if SENTENCEPIECE_DEPENDENCY is empty
     # shellcheck disable=SC2086
     pip install \
@@ -76,7 +75,6 @@ esac
         coverage \
         expecttest \
         inflect \
-        kaldi-io \
         pytest \
         pytest-cov \
         pytorch-lightning \
 
@@ -34,17 +34,13 @@ jobs:
         export PYTHON_VERSION="${{ matrix.python_version }}"
         export CU_VERSION="${{ matrix.cuda_arch_version }}"
         export CUDATOOLKIT="pytorch-cuda=${CU_VERSION}"
-        export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_APPLY_CMVN_SLIDING=true
-        export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_FBANK_FEATS=true
-        export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_KALDI_PITCH_FEATS=true
-        export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS=true
-        export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS=true
         export TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY=true
         export TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310=true
         export TORCHAUDIO_TEST_ALLOW_SKIP_IF_TEMPORARY_DISABLED=true
         export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_DECODER=true
         export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_ENCODER=true
         export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=true
+        export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_demucs=true
         # Avoid reproducibility errors with CUBLAS: https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility
         export CUBLAS_WORKSPACE_CONFIG=:4096:8
 
@@ -78,35 +74,8 @@ jobs:
 
         echo "::endgroup::"
         echo "::group::Install other Dependencies"
-        # conda install \
-        #   --quiet --yes \
-        #   -c conda-forge \
-        #   -c numba/label/dev \
-        #   sox libvorbis 'librosa==0.10.0' parameterized 'requests>=2.20'
-        # pip3 install --progress-bar off \
-        #   kaldi-io \
-        #   SoundFile \
-        #   coverage \
-        #   pytest \
-        #   pytest-cov \
-        #   scipy \
-        #   transformers \
-        #   expecttest \
-        #   unidecode \
-        #   inflect \
-        #   Pillow \
-        #   sentencepiece \
-        #   pytorch-lightning \
-        #   'protobuf<4.21.0' \
-        #   demucs \
-        #   tinytag \
-        #   flashlight-text \
-        #   git+https://github.com/kpu/kenlm/ \
-        #   git+https://github.com/pytorch/fairseq.git@e47a4c8
         
-        pip3 install parameterized requests
-        pip3 install kaldi-io SoundFile librosa coverage pytest pytest-cov scipy expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag 
-        pip3 install "pillow<10.0" "scipy<1.10" "numpy<2.0"
+        pip3 install parameterized requests coverage pytest pytest-cov scipy numpy expecttest 
 
         echo "::endgroup::"
         echo "::group::Run tests"
 
@@ -1,11 +1 @@
 IPython
-deep-phonemizer
-boto3
-cython
-pandas
-librosa==0.10.0
-sentencepiece
-pandoc
-mir_eval
-pesq
-pystoi
@@ -121,9 +121,13 @@ def _get_pattern():
         }
 
     ret = {"filename_pattern": "tutorial.py"}
+    no_build = r"/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py"
     if os.getenv("GALLERY_PATTERN"):
         # See https://github.com/pytorch/tutorials/blob/cbf2238df0e78d84c15bd94288966d2f4b2e83ae/conf.py#L75-L83
-        ret["ignore_pattern"] = r"/(?!" + re.escape(os.getenv("GALLERY_PATTERN")) + r")[^/]+$"
+        ret["ignore_pattern"] = r"(/(?!" + re.escape(os.getenv("GALLERY_PATTERN")) + r")[^/]+$)"
+        ret["ignore_pattern"] += "|(" + no_build + ")"
+    else:
+        ret["ignore_pattern"] = no_build
     return ret
 
 
 
@@ -8,7 +8,6 @@
 from typing import Tuple
 
 import torch
-from sklearn.cluster import MiniBatchKMeans
 from torch import Tensor
 
 from .common_utils import _get_feat_lens_paths, _get_model_path
@@ -102,6 +101,7 @@ def learn_kmeans(
     """
     if not km_dir.exists():
         km_dir.mkdir()
+    from sklearn.cluster import MiniBatchKMeans
 
     km_model = MiniBatchKMeans(
         n_clusters=n_clusters,
 
@@ -25,7 +25,6 @@
 print(torch.__version__)
 print(torchaudio.__version__)
 
-import librosa
 import matplotlib.pyplot as plt
 
 ######################################################################
@@ -75,7 +74,8 @@ def plot_spectrogram(specgram, title=None, ylabel="freq_bin", ax=None):
     if title is not None:
         ax.set_title(title)
     ax.set_ylabel(ylabel)
-    ax.imshow(librosa.power_to_db(specgram), origin="lower", aspect="auto", interpolation="nearest")
+    power_to_db = T.AmplitudeToDB("power", 80.0)
+    ax.imshow(power_to_db(specgram), origin="lower", aspect="auto", interpolation="nearest")
 
 
 def plot_fbank(fbank, title=None):
 
@@ -19,7 +19,7 @@
 # 1. Text preprocessing
 #
 #    First, the input text is encoded into a list of symbols. In this
-#    tutorial, we will use English characters and phonemes as the symbols.
+#    tutorial, we will use English characters as the symbols.
 #
 # 2. Spectrogram generation
 #
@@ -47,16 +47,6 @@
 # Preparation
 # -----------
 #
-# First, we install the necessary dependencies. In addition to
-# ``torchaudio``, ``DeepPhonemizer`` is required to perform phoneme-based
-# encoding.
-#
-
-# %%
-#  .. code-block:: bash
-#
-#      %%bash
-#      pip3 install deep_phonemizer
 
 import torch
 import torchaudio
@@ -140,49 +130,6 @@ def text_to_sequence(text):
 print([processor.tokens[i] for i in processed[0, : lengths[0]]])
 
 
-######################################################################
-# Phoneme-based encoding
-# ~~~~~~~~~~~~~~~~~~~~~~
-#
-# Phoneme-based encoding is similar to character-based encoding, but it
-# uses a symbol table based on phonemes and a G2P (Grapheme-to-Phoneme)
-# model.
-#
-# The detail of the G2P model is out of the scope of this tutorial, we will
-# just look at what the conversion looks like.
-#
-# Similar to the case of character-based encoding, the encoding process is
-# expected to match what a pretrained Tacotron2 model is trained on.
-# ``torchaudio`` has an interface to create the process.
-#
-# The following code illustrates how to make and use the process. Behind
-# the scene, a G2P model is created using ``DeepPhonemizer`` package, and
-# the pretrained weights published by the author of ``DeepPhonemizer`` is
-# fetched.
-#
-
-bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
-
-processor = bundle.get_text_processor()
-
-text = "Hello world! Text to speech!"
-with torch.inference_mode():
-    processed, lengths = processor(text)
-
-print(processed)
-print(lengths)
-
-
-######################################################################
-# Notice that the encoded values are different from the example of
-# character-based encoding.
-#
-# The intermediate representation looks like the following.
-#
-
-print([processor.tokens[i] for i in processed[0, : lengths[0]]])
-
-
 ######################################################################
 # Spectrogram Generation
 # ----------------------
@@ -202,7 +149,7 @@ def text_to_sequence(text):
 # :py:class:`~torchaudio.pipelines.Tacotron2TTSBundle`.
 #
 
-bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
+bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_CHAR_LJSPEECH
 processor = bundle.get_text_processor()
 tacotron2 = bundle.get_tacotron2().to(device)
 
@@ -256,7 +203,7 @@ def plot():
 # WaveRNN model from the same bundle.
 #
 
-bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
+bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_CHAR_LJSPEECH
 
 processor = bundle.get_text_processor()
 tacotron2 = bundle.get_tacotron2().to(device)
@@ -299,7 +246,7 @@ def plot(waveforms, spec, sample_rate):
 # method and pass the spectrogram.
 #
 
-bundle = torchaudio.pipelines.TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
+bundle = torchaudio.pipelines.TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
 
 processor = bundle.get_text_processor()
 tacotron2 = bundle.get_tacotron2().to(device)
Original file line number	Diff line number	Diff line change
`@@ -29,6 +29,10 @@ fi`
`29`	`29`	`)`
`30`	`30`
`31`	`31`	`(`
	`32`	`+ export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CTC_DECODER=true`
	`33`	`+ export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true`
	`34`	`+ export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true`
	`35`	`+ export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true`
`32`	`36`	`cd test`
`33`		`- pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"`
	`37`	`+ pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"`
`34`	`38`	`)`