Skip to content

Commit c4c2f78

Browse files
authored
Merge branch 'main' into export-D79451615
2 parents 02d6815 + 02351a6 commit c4c2f78

File tree

752 files changed

+251
-368
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

752 files changed

+251
-368
lines changed

.github/scripts/unittest-linux/install.sh

Lines changed: 2 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -24,41 +24,7 @@ esac
2424
conda create -n ci -y python="${PYTHON_VERSION}"
2525
conda activate ci
2626

27-
# 1. Install PyTorch
28-
# if [ -z "${CUDA_VERSION:-}" ] ; then
29-
# if [ "${os}" == MacOSX ] ; then
30-
# cudatoolkit=''
31-
# else
32-
# cudatoolkit="cpuonly"
33-
# fi
34-
# version="cpu"
35-
# else
36-
# version="$(python -c "print('.'.join(\"${CUDA_VERSION}\".split('.')[:2]))")"
37-
# export CUDATOOLKIT_CHANNEL="nvidia"
38-
# cudatoolkit="pytorch-cuda=${version}"
39-
# fi
40-
41-
# printf "Installing PyTorch with %s\n" "${cudatoolkit}"
42-
# (
43-
# if [ "${os}" == MacOSX ] ; then
44-
# # TODO: this can be removed as soon as linking issue could be resolved
45-
# # see https://github.com/pytorch/pytorch/issues/62424 from details
46-
# MKL_CONSTRAINT='mkl==2021.2.0'
47-
# pytorch_build=pytorch
48-
# else
49-
# MKL_CONSTRAINT=''
50-
# pytorch_build="pytorch[build="*${version}*"]"
51-
# fi
52-
# set -x
53-
54-
# if [[ -z "$cudatoolkit" ]]; then
55-
# conda install ${CONDA_CHANNEL_FLAGS:-} -y -c "pytorch-${UPLOAD_CHANNEL}" $MKL_CONSTRAINT "pytorch-${UPLOAD_CHANNEL}::${pytorch_build}"
56-
# else
57-
# conda install pytorch ${cudatoolkit} ${CONDA_CHANNEL_FLAGS:-} -y -c "pytorch-${UPLOAD_CHANNEL}" -c nvidia $MKL_CONSTRAINT
58-
# fi
59-
# )
60-
61-
export GPU_ARCH_TYPE="cpu" # TODO change this
27+
export GPU_ARCH_TYPE="cpu"
6228

6329
case $GPU_ARCH_TYPE in
6430
cpu)
@@ -90,22 +56,4 @@ printf "* Installing test tools\n"
9056
conda install -y "ffmpeg<5"
9157
python -c "import torch; import torchaudio; import torchcodec; print(torch.__version__, torchaudio.__version__, torchcodec.__version__)"
9258

93-
NUMBA_DEV_CHANNEL=""
94-
if [[ "$(python --version)" = *3.9* || "$(python --version)" = *3.10* ]]; then
95-
# Numba isn't available for Python 3.9 and 3.10 except on the numba dev channel and building from source fails
96-
# See https://github.com/librosa/librosa/issues/1270#issuecomment-759065048
97-
NUMBA_DEV_CHANNEL="-c numba/label/dev"
98-
fi
99-
(
100-
set -x
101-
conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} sox libvorbis parameterized 'requests>=2.20'
102-
pip install kaldi-io SoundFile librosa coverage pytest pytest-cov scipy expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag pyroomacoustics flashlight-text git+https://github.com/kpu/kenlm
103-
104-
# TODO: might be better to fix the single call to `pip install` above
105-
pip install "pillow<10.0" "scipy<1.10" "numpy<2.0"
106-
)
107-
# Install fairseq
108-
git clone https://github.com/pytorch/fairseq
109-
cd fairseq
110-
git checkout e47a4c8
111-
pip install .
59+
pip3 install parameterized requests coverage pytest pytest-cov scipy numpy expecttest

.github/scripts/unittest-linux/run_test.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ fi
2929
)
3030

3131
(
32+
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CTC_DECODER=true
33+
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_unidecode=true
34+
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_inflect=true
35+
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_pytorch_lightning=true
3236
cd test
33-
pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not sox and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"
37+
pytest torchaudio_unittest -k "not backend and not /io/ and not prototype and not ffmpeg and not fairseq and not hdemucs and not (torchscript and rnnt) and not torchscript_consistency"
3438
)

.github/scripts/unittest-windows/environment.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ dependencies:
88
- scipy >= 1.4.1
99
- pip
1010
- pip:
11-
- kaldi-io
1211
- PySoundFile
1312
- future
1413
- parameterized

.github/scripts/unittest-windows/install.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,8 @@ case "$(python --version)" in
6464
NUMBA_DEV_CHANNEL="-c numba/label/dev"
6565
;;
6666
esac
67-
# Note: installing librosa via pip fail because it will try to compile numba.
6867
(
69-
conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} 'librosa==0.10.0' parameterized 'requests>=2.20'
68+
conda install -y -c conda-forge ${NUMBA_DEV_CHANNEL} parameterized 'requests>=2.20'
7069
# Need to disable shell check since this'll fail out if SENTENCEPIECE_DEPENDENCY is empty
7170
# shellcheck disable=SC2086
7271
pip install \
@@ -76,7 +75,6 @@ esac
7675
coverage \
7776
expecttest \
7877
inflect \
79-
kaldi-io \
8078
pytest \
8179
pytest-cov \
8280
pytorch-lightning \

.github/workflows/unittest-linux-gpu.yml

Lines changed: 2 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,13 @@ jobs:
3434
export PYTHON_VERSION="${{ matrix.python_version }}"
3535
export CU_VERSION="${{ matrix.cuda_arch_version }}"
3636
export CUDATOOLKIT="pytorch-cuda=${CU_VERSION}"
37-
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_APPLY_CMVN_SLIDING=true
38-
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_FBANK_FEATS=true
39-
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_KALDI_PITCH_FEATS=true
40-
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS=true
41-
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS=true
4237
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY=true
4338
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310=true
4439
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_TEMPORARY_DISABLED=true
4540
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_DECODER=true
4641
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX_ENCODER=true
4742
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_FFMPEG=true
43+
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_demucs=true
4844
# Avoid reproducibility errors with CUBLAS: https://docs.nvidia.com/cuda/cublas/index.html#results-reproducibility
4945
export CUBLAS_WORKSPACE_CONFIG=:4096:8
5046
@@ -78,35 +74,8 @@ jobs:
7874
7975
echo "::endgroup::"
8076
echo "::group::Install other Dependencies"
81-
# conda install \
82-
# --quiet --yes \
83-
# -c conda-forge \
84-
# -c numba/label/dev \
85-
# sox libvorbis 'librosa==0.10.0' parameterized 'requests>=2.20'
86-
# pip3 install --progress-bar off \
87-
# kaldi-io \
88-
# SoundFile \
89-
# coverage \
90-
# pytest \
91-
# pytest-cov \
92-
# scipy \
93-
# transformers \
94-
# expecttest \
95-
# unidecode \
96-
# inflect \
97-
# Pillow \
98-
# sentencepiece \
99-
# pytorch-lightning \
100-
# 'protobuf<4.21.0' \
101-
# demucs \
102-
# tinytag \
103-
# flashlight-text \
104-
# git+https://github.com/kpu/kenlm/ \
105-
# git+https://github.com/pytorch/fairseq.git@e47a4c8
10677
107-
pip3 install parameterized requests
108-
pip3 install kaldi-io SoundFile librosa coverage pytest pytest-cov scipy expecttest unidecode inflect Pillow sentencepiece pytorch-lightning 'protobuf<4.21.0' demucs tinytag
109-
pip3 install "pillow<10.0" "scipy<1.10" "numpy<2.0"
78+
pip3 install parameterized requests coverage pytest pytest-cov scipy numpy expecttest
11079
11180
echo "::endgroup::"
11281
echo "::group::Run tests"

docs/requirements-tutorials.txt

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1 @@
11
IPython
2-
deep-phonemizer
3-
boto3
4-
cython
5-
pandas
6-
librosa==0.10.0
7-
sentencepiece
8-
pandoc
9-
mir_eval
10-
pesq
11-
pystoi

docs/source/conf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,9 +121,13 @@ def _get_pattern():
121121
}
122122

123123
ret = {"filename_pattern": "tutorial.py"}
124+
no_build = r"/examples/tutorials/asr_inference_with_cuda_ctc_decoder_tutorial.py"
124125
if os.getenv("GALLERY_PATTERN"):
125126
# See https://github.com/pytorch/tutorials/blob/cbf2238df0e78d84c15bd94288966d2f4b2e83ae/conf.py#L75-L83
126-
ret["ignore_pattern"] = r"/(?!" + re.escape(os.getenv("GALLERY_PATTERN")) + r")[^/]+$"
127+
ret["ignore_pattern"] = r"(/(?!" + re.escape(os.getenv("GALLERY_PATTERN")) + r")[^/]+$)"
128+
ret["ignore_pattern"] += "|(" + no_build + ")"
129+
else:
130+
ret["ignore_pattern"] = no_build
127131
return ret
128132

129133

examples/hubert/utils/kmeans.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from typing import Tuple
99

1010
import torch
11-
from sklearn.cluster import MiniBatchKMeans
1211
from torch import Tensor
1312

1413
from .common_utils import _get_feat_lens_paths, _get_model_path
@@ -102,6 +101,7 @@ def learn_kmeans(
102101
"""
103102
if not km_dir.exists():
104103
km_dir.mkdir()
104+
from sklearn.cluster import MiniBatchKMeans
105105

106106
km_model = MiniBatchKMeans(
107107
n_clusters=n_clusters,

examples/tutorials/audio_feature_extractions_tutorial.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
print(torch.__version__)
2626
print(torchaudio.__version__)
2727

28-
import librosa
2928
import matplotlib.pyplot as plt
3029

3130
######################################################################
@@ -75,7 +74,8 @@ def plot_spectrogram(specgram, title=None, ylabel="freq_bin", ax=None):
7574
if title is not None:
7675
ax.set_title(title)
7776
ax.set_ylabel(ylabel)
78-
ax.imshow(librosa.power_to_db(specgram), origin="lower", aspect="auto", interpolation="nearest")
77+
power_to_db = T.AmplitudeToDB("power", 80.0)
78+
ax.imshow(power_to_db(specgram), origin="lower", aspect="auto", interpolation="nearest")
7979

8080

8181
def plot_fbank(fbank, title=None):

examples/tutorials/tacotron2_pipeline_tutorial.py

Lines changed: 4 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
# 1. Text preprocessing
2020
#
2121
# First, the input text is encoded into a list of symbols. In this
22-
# tutorial, we will use English characters and phonemes as the symbols.
22+
# tutorial, we will use English characters as the symbols.
2323
#
2424
# 2. Spectrogram generation
2525
#
@@ -47,16 +47,6 @@
4747
# Preparation
4848
# -----------
4949
#
50-
# First, we install the necessary dependencies. In addition to
51-
# ``torchaudio``, ``DeepPhonemizer`` is required to perform phoneme-based
52-
# encoding.
53-
#
54-
55-
# %%
56-
# .. code-block:: bash
57-
#
58-
# %%bash
59-
# pip3 install deep_phonemizer
6050

6151
import torch
6252
import torchaudio
@@ -140,49 +130,6 @@ def text_to_sequence(text):
140130
print([processor.tokens[i] for i in processed[0, : lengths[0]]])
141131

142132

143-
######################################################################
144-
# Phoneme-based encoding
145-
# ~~~~~~~~~~~~~~~~~~~~~~
146-
#
147-
# Phoneme-based encoding is similar to character-based encoding, but it
148-
# uses a symbol table based on phonemes and a G2P (Grapheme-to-Phoneme)
149-
# model.
150-
#
151-
# The detail of the G2P model is out of the scope of this tutorial, we will
152-
# just look at what the conversion looks like.
153-
#
154-
# Similar to the case of character-based encoding, the encoding process is
155-
# expected to match what a pretrained Tacotron2 model is trained on.
156-
# ``torchaudio`` has an interface to create the process.
157-
#
158-
# The following code illustrates how to make and use the process. Behind
159-
# the scene, a G2P model is created using ``DeepPhonemizer`` package, and
160-
# the pretrained weights published by the author of ``DeepPhonemizer`` is
161-
# fetched.
162-
#
163-
164-
bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
165-
166-
processor = bundle.get_text_processor()
167-
168-
text = "Hello world! Text to speech!"
169-
with torch.inference_mode():
170-
processed, lengths = processor(text)
171-
172-
print(processed)
173-
print(lengths)
174-
175-
176-
######################################################################
177-
# Notice that the encoded values are different from the example of
178-
# character-based encoding.
179-
#
180-
# The intermediate representation looks like the following.
181-
#
182-
183-
print([processor.tokens[i] for i in processed[0, : lengths[0]]])
184-
185-
186133
######################################################################
187134
# Spectrogram Generation
188135
# ----------------------
@@ -202,7 +149,7 @@ def text_to_sequence(text):
202149
# :py:class:`~torchaudio.pipelines.Tacotron2TTSBundle`.
203150
#
204151

205-
bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
152+
bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_CHAR_LJSPEECH
206153
processor = bundle.get_text_processor()
207154
tacotron2 = bundle.get_tacotron2().to(device)
208155

@@ -256,7 +203,7 @@ def plot():
256203
# WaveRNN model from the same bundle.
257204
#
258205

259-
bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_PHONE_LJSPEECH
206+
bundle = torchaudio.pipelines.TACOTRON2_WAVERNN_CHAR_LJSPEECH
260207

261208
processor = bundle.get_text_processor()
262209
tacotron2 = bundle.get_tacotron2().to(device)
@@ -299,7 +246,7 @@ def plot(waveforms, spec, sample_rate):
299246
# method and pass the spectrogram.
300247
#
301248

302-
bundle = torchaudio.pipelines.TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH
249+
bundle = torchaudio.pipelines.TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH
303250

304251
processor = bundle.get_text_processor()
305252
tacotron2 = bundle.get_tacotron2().to(device)

0 commit comments

Comments
 (0)