Skip to content

Commit a62d0ed

Browse files
committed
🚀 Cleaner code, better README, add License.
1 parent 128475a commit a62d0ed

40 files changed

+494
-371
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,6 @@ ljspeech
3232
/datasets
3333
/examples/tacotron2/exp/
3434
/temp/
35+
LibriTTS/
36+
dataset/
37+
mfa/

examples/fastspeech2_multispeaker/README.md

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,56 @@
33
## Prepare
44
Everything is done from main repo folder so TensorflowTTS/
55

6-
0. Optional* Download and prepare libritts (helper to prepare libri in examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb)
6+
0. Optional* [Download](http://www.openslr.org/60/) and prepare libritts (helper to prepare libri in examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb)
7+
- Dataset structure after finish this step:
8+
```
9+
|- TensorFlowTTS/
10+
| |- LibriTTS/
11+
| |- |- train-clean-100/
12+
| |- |- SPEAKERS.txt
13+
| |- |- ...
14+
| |- dataset/
15+
| |- |- 200/
16+
| |- |- |- 200_124139_000001_000000.txt
17+
| |- |- |- 200_124139_000001_000000.wav
18+
| |- |- |- ...
19+
| |- |- 250/
20+
| |- |- ...
21+
| |- tensorflow_tts/
22+
| |- models/
23+
| |- ...
24+
```
725
1. Extract Duration (use examples/mfa_extraction or pretrained tacotron2)
8-
2. Optional* build docker `bash examples/fastspeech2_multispeaker/scripts/build.sh`
9-
3. Optional* run docker `bash examples/fastspeech2_multispeaker/scripts/interactive.sh`
10-
4. Run `tensorflow-tts-preprocess --rootdir ./dataset --outdir ./dump --config preprocess/preprocess_libritts.yaml --dataset multispeaker`
11-
5. Run `tensorflow-tts-normalize --rootdir ./dump --outdir ./dump --config preprocess/preprocess_libritts.yaml --dataset multispeaker`
26+
2. Optional* build docker
27+
- ```
28+
bash examples/fastspeech2_multispeaker/scripts/build.sh
29+
```
30+
3. Optional* run docker
31+
- ```
32+
bash examples/fastspeech2_multispeaker/scripts/interactive.sh
33+
```
34+
4. Preprocessing:
35+
- ```
36+
tensorflow-tts-preprocess --rootdir ./dataset \
37+
--outdir ./dump \
38+
--config preprocess/preprocess_libritts.yaml \
39+
--dataset multispeaker
40+
```
41+
42+
5. Normalization:
43+
- ```
44+
tensorflow-tts-normalize --rootdir ./dump \
45+
--outdir ./dump \
46+
--config preprocess/preprocess_libritts.yaml \
47+
--dataset multispeaker
48+
```
49+
1250
6. Change CharactorDurationF0EnergyMelDataset speaker mapper in fastspeech2_dataset to match your dataset (if you use libri with mfa_extraction you didnt need to change anything)
13-
7. Change train.sh to match your dataset and run `bash examples/fastspeech2_multispeaker/scripts/train.sh`
14-
or run libri `bash examples/fastspeech2_multispeaker/scripts/train_libri.sh`
15-
8. Optional* If u have problems with tensor sizes mismatch check examples/mfa_extraction directory
51+
7. Change train_libri.sh to match your dataset and run:
52+
- ```
53+
bash examples/fastspeech2_multispeaker/scripts/train_libri.sh
54+
```
55+
8. Optional* If u have problems with tensor sizes mismatch check step 5 in `examples/mfa_extraction` directory
1656

1757
## Comments
1858

examples/fastspeech2_multispeaker/conf/fastspeech2.v1.yaml

Lines changed: 0 additions & 74 deletions
This file was deleted.

examples/fastspeech2_multispeaker/conf/fastspeech2.v2.yaml

Lines changed: 0 additions & 78 deletions
This file was deleted.

examples/fastspeech2_multispeaker/conf/fastspeech2libritts.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# This is the hyperparameter configuration file for FastSpeech2 v1.
2-
# Please make sure this is adjusted for the LJSpeech dataset. If you want to
2+
# Please make sure this is adjusted for the LibriTTS dataset. If you want to
33
# apply to the other dataset, you might need to carefully change some parameters.
44
# This configuration performs 200k iters but a best checkpoint is around 150k iters.
55

@@ -9,10 +9,11 @@
99
hop_size: 256 # Hop size.
1010
format: "npy"
1111

12-
model_type: fastspeech2
1312
###########################################################
1413
# NETWORK ARCHITECTURE SETTING #
1514
###########################################################
15+
model_type: fastspeech2
16+
1617
fastspeech2_params:
1718
n_speakers: 20
1819
encoder_hidden_size: 384
@@ -70,5 +71,5 @@ log_interval_steps: 200 # Interval steps to record the training lo
7071
###########################################################
7172
# OTHER SETTING #
7273
###########################################################
73-
use_griffin: true
74+
use_griffin: true # Use GL on evaluation or not.
7475
num_save_intermediate_results: 1 # Number of batch to be saved as intermediate results.

examples/fastspeech2_multispeaker/fastspeech2_dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# -*- coding: utf-8 -*-
2-
# Copyright 2020 Minh Nguyen (@dathudeptrai)
2+
# Copyright 2020 TensorFlowTTS Team.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License");
55
# you may not use this file except in compliance with the License.

examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@
1010
"import random\n",
1111
"import shutil\n",
1212
"\n",
13-
"libri_path = \"LibriTTS\"\n",
14-
"dataset_path = \"TensorflowTTS/libritts\" # Change to your paths\n",
13+
"libri_path = \"...../TensorflowTTS/LibriTTS\" # absolute path to TensorFlowTTS.\n",
14+
"dataset_path = \"...../TensorflowTTS/dataset\" # Change to your paths\n",
1515
"subset = \"train-clean-100\""
1616
]
1717
},
1818
{
1919
"cell_type": "code",
20-
"execution_count": 3,
20+
"execution_count": 2,
2121
"metadata": {},
2222
"outputs": [],
2323
"source": [
24-
"with open(f\"{libri_path}/SPEAKERS.txt\") as f:\n",
24+
"with open(os.path.join(libri_path, \"SPEAKERS.txt\")) as f:\n",
2525
" data = f.readlines()\n",
2626
" \n",
2727
"dataset_info = {}\n",
@@ -33,7 +33,7 @@
3333
},
3434
{
3535
"cell_type": "code",
36-
"execution_count": 4,
36+
"execution_count": 3,
3737
"metadata": {},
3838
"outputs": [],
3939
"source": [
@@ -42,7 +42,7 @@
4242
},
4343
{
4444
"cell_type": "code",
45-
"execution_count": 5,
45+
"execution_count": 4,
4646
"metadata": {},
4747
"outputs": [],
4848
"source": [
@@ -51,7 +51,7 @@
5151
},
5252
{
5353
"cell_type": "code",
54-
"execution_count": 6,
54+
"execution_count": 5,
5555
"metadata": {},
5656
"outputs": [],
5757
"source": [
@@ -60,22 +60,22 @@
6060
},
6161
{
6262
"cell_type": "code",
63-
"execution_count": 1,
63+
"execution_count": 6,
6464
"metadata": {},
6565
"outputs": [],
6666
"source": [
6767
"possible_map = {}\n",
68-
"subset_path = f\"{libri_path}{subset}\"\n",
68+
"subset_path = os.path.join(libri_path, subset)\n",
6969
"for i in os.listdir(subset_path):\n",
7070
" if i in ids:\n",
71-
" id_path = f\"{subset_path}/{i}\"\n",
71+
" id_path = os.path.join(subset_path, i)\n",
7272
" id_dur = 0\n",
7373
" id_included = []\n",
7474
" \n",
7575
" for k in os.listdir(id_path):\n",
76-
" for j in os.listdir(f\"{id_path}/{k}\"):\n",
76+
" for j in os.listdir(os.path.join(id_path, k)):\n",
7777
" if \".wav\" in j:\n",
78-
" f_path = f\"{id_path}/{k}/{j}\"\n",
78+
" f_path = os.path.join(id_path, k, j)\n",
7979
" sf_file = sf.SoundFile(f_path)\n",
8080
" dur = len(sf_file) / sf_file.samplerate\n",
8181
" if max_file_len < dur < min_file_len:\n",
@@ -89,7 +89,7 @@
8989
},
9090
{
9191
"cell_type": "code",
92-
"execution_count": 8,
92+
"execution_count": 7,
9393
"metadata": {},
9494
"outputs": [],
9595
"source": [
@@ -98,7 +98,7 @@
9898
},
9999
{
100100
"cell_type": "code",
101-
"execution_count": null,
101+
"execution_count": 8,
102102
"metadata": {},
103103
"outputs": [],
104104
"source": [
@@ -109,7 +109,7 @@
109109
},
110110
{
111111
"cell_type": "code",
112-
"execution_count": null,
112+
"execution_count": 9,
113113
"metadata": {},
114114
"outputs": [],
115115
"source": [
@@ -118,9 +118,9 @@
118118
" for j in v:\n",
119119
" f_name = j.split(\"/\")[-1]\n",
120120
" text_f_name = f_name.split(\".wav\")[0] + \".txt\"\n",
121-
" os.makedirs(f\"{dataset_path}/{sp_id}\", exist_ok=True)\n",
122-
" shutil.copy(j, f\"{dataset_path}/{sp_id}/{f_name}\")\n",
123-
" shutil.copy(j.replace(\".wav\", \".normalized.txt\"), f\"{dataset_path}/{sp_id}/{text_f_name}\")"
121+
" os.makedirs(os.path.join(dataset_path, sp_id), exist_ok=True)\n",
122+
" shutil.copy(j, os.path.join(dataset_path, sp_id, f_name))\n",
123+
" shutil.copy(j.replace(\".wav\", \".normalized.txt\"), os.path.join(dataset_path, sp_id, text_f_name))"
124124
]
125125
}
126126
],
@@ -140,9 +140,9 @@
140140
"name": "python",
141141
"nbconvert_exporter": "python",
142142
"pygments_lexer": "ipython3",
143-
"version": "3.8.3"
143+
"version": "3.7.7"
144144
}
145145
},
146146
"nbformat": 4,
147147
"nbformat_minor": 4
148-
}
148+
}

0 commit comments

Comments
 (0)