TensorSpeech
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/fastspeech2_multispeaker/README.md‎
Lines changed: 48 additions & 8 deletions b/‎examples/fastspeech2_multispeaker/README.md‎
Lines changed: 48 additions & 8 deletions
diff --git a/‎examples/fastspeech2_multispeaker/conf/fastspeech2.v1.yaml‎
Lines changed: 0 additions & 74 deletions b/‎examples/fastspeech2_multispeaker/conf/fastspeech2.v1.yaml‎
Lines changed: 0 additions & 74 deletions
diff --git a/‎examples/fastspeech2_multispeaker/conf/fastspeech2.v2.yaml‎
Lines changed: 0 additions & 78 deletions b/‎examples/fastspeech2_multispeaker/conf/fastspeech2.v2.yaml‎
Lines changed: 0 additions & 78 deletions
diff --git a/‎examples/fastspeech2_multispeaker/conf/fastspeech2libritts.yaml‎
Lines changed: 4 additions & 3 deletions b/‎examples/fastspeech2_multispeaker/conf/fastspeech2libritts.yaml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/fastspeech2_multispeaker/fastspeech2_dataset.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/fastspeech2_multispeaker/fastspeech2_dataset.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb‎
Lines changed: 20 additions & 20 deletions b/‎examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb‎
Lines changed: 20 additions & 20 deletions
@@ -32,3 +32,6 @@ ljspeech
 /datasets
 /examples/tacotron2/exp/
 /temp/
+LibriTTS/
+dataset/
+mfa/
@@ -3,16 +3,56 @@
 ## Prepare
 Everything is done from main repo folder so TensorflowTTS/
 
-0. Optional* Download and prepare libritts (helper to prepare libri in examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb)
+0. Optional* [Download](http://www.openslr.org/60/) and prepare libritts (helper to prepare libri in examples/fastspeech2_multispeaker/libri_experiment/prepare_libri.ipynb)
+- Dataset structure after finish this step:
+    ```
+    |- TensorFlowTTS/
+    |   |- LibriTTS/
+    |   |-  |- train-clean-100/
+    |   |-  |- SPEAKERS.txt
+    |   |-  |- ...
+    |   |- dataset/
+    |   |-  |- 200/
+    |   |-  |-  |- 200_124139_000001_000000.txt
+    |   |-  |-  |- 200_124139_000001_000000.wav
+    |   |-  |-  |- ...
+    |   |-  |- 250/
+    |   |-  |- ...
+    |   |- tensorflow_tts/
+    |       |- models/
+    |       |- ...
+    ``` 
 1. Extract Duration (use examples/mfa_extraction or pretrained tacotron2) 
-2. Optional* build docker `bash examples/fastspeech2_multispeaker/scripts/build.sh`
-3. Optional* run docker `bash examples/fastspeech2_multispeaker/scripts/interactive.sh`
-4. Run `tensorflow-tts-preprocess --rootdir ./dataset --outdir ./dump --config preprocess/preprocess_libritts.yaml --dataset multispeaker`
-5. Run `tensorflow-tts-normalize --rootdir ./dump --outdir ./dump --config preprocess/preprocess_libritts.yaml --dataset multispeaker`
+2. Optional* build docker 
+- ```
+  bash examples/fastspeech2_multispeaker/scripts/build.sh
+  ```
+3. Optional* run docker
+- ```
+  bash examples/fastspeech2_multispeaker/scripts/interactive.sh
+  ```
+4. Preprocessing:
+- ```
+  tensorflow-tts-preprocess --rootdir ./dataset \
+    --outdir ./dump \
+    --config preprocess/preprocess_libritts.yaml \
+    --dataset multispeaker
+  ```
+
+5. Normalization:
+- ```
+  tensorflow-tts-normalize --rootdir ./dump \
+    --outdir ./dump \
+    --config preprocess/preprocess_libritts.yaml \
+    --dataset multispeaker
+  ```
+
 6. Change CharactorDurationF0EnergyMelDataset speaker mapper in fastspeech2_dataset to match your dataset (if you use libri with mfa_extraction you didnt need to change anything)
-7. Change train.sh to match your dataset and run `bash examples/fastspeech2_multispeaker/scripts/train.sh` 
-or run libri `bash examples/fastspeech2_multispeaker/scripts/train_libri.sh`
-8. Optional* If u have problems with tensor sizes mismatch check examples/mfa_extraction directory
+7. Change train_libri.sh to match your dataset and run:
+- ```
+  bash examples/fastspeech2_multispeaker/scripts/train_libri.sh
+  ```
+8. Optional* If u have problems with tensor sizes mismatch check step 5 in `examples/mfa_extraction` directory
 
 ## Comments
 
 
@@ -1,5 +1,5 @@
 # This is the hyperparameter configuration file for FastSpeech2 v1.
-# Please make sure this is adjusted for the LJSpeech dataset. If you want to
+# Please make sure this is adjusted for the LibriTTS dataset. If you want to
 # apply to the other dataset, you might need to carefully change some parameters.
 # This configuration performs 200k iters but a best checkpoint is around 150k iters.
 
@@ -9,10 +9,11 @@
 hop_size: 256            # Hop size.
 format: "npy"
 
-model_type: fastspeech2
 ###########################################################
 #              NETWORK ARCHITECTURE SETTING               #
 ###########################################################
+model_type: fastspeech2
+
 fastspeech2_params:
     n_speakers: 20
     encoder_hidden_size: 384
@@ -70,5 +71,5 @@ log_interval_steps: 200               # Interval steps to record the training lo
 ###########################################################
 #                     OTHER SETTING                       #
 ###########################################################
-use_griffin: true
+use_griffin: true                 # Use GL on evaluation or not.
 num_save_intermediate_results: 1  # Number of batch to be saved as intermediate results.
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright 2020 Minh Nguyen (@dathudeptrai)
+# Copyright 2020 TensorFlowTTS Team.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 
@@ -10,18 +10,18 @@
     "import random\n",
     "import shutil\n",
     "\n",
-    "libri_path = \"LibriTTS\"\n",
-    "dataset_path = \"TensorflowTTS/libritts\" # Change to your paths\n",
+    "libri_path = \"...../TensorflowTTS/LibriTTS\" # absolute path to TensorFlowTTS.\n",
+    "dataset_path = \"...../TensorflowTTS/dataset\" # Change to your paths\n",
     "subset = \"train-clean-100\""
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(f\"{libri_path}/SPEAKERS.txt\") as f:\n",
+    "with open(os.path.join(libri_path, \"SPEAKERS.txt\")) as f:\n",
     "    data = f.readlines()\n",
     "    \n",
     "dataset_info = {}\n",
@@ -33,7 +33,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -42,7 +42,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -60,22 +60,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "possible_map = {}\n",
-    "subset_path = f\"{libri_path}{subset}\"\n",
+    "subset_path = os.path.join(libri_path, subset)\n",
     "for i in os.listdir(subset_path):\n",
     "    if i in ids:\n",
-    "        id_path = f\"{subset_path}/{i}\"\n",
+    "        id_path = os.path.join(subset_path, i)\n",
     "        id_dur = 0\n",
     "        id_included = []\n",
     "        \n",
     "        for k in os.listdir(id_path):\n",
-    "            for j in os.listdir(f\"{id_path}/{k}\"):\n",
+    "            for j in os.listdir(os.path.join(id_path, k)):\n",
     "                if \".wav\" in j:\n",
-    "                    f_path = f\"{id_path}/{k}/{j}\"\n",
+    "                    f_path = os.path.join(id_path, k, j)\n",
     "                    sf_file =  sf.SoundFile(f_path)\n",
     "                    dur = len(sf_file) / sf_file.samplerate\n",
     "                    if max_file_len < dur < min_file_len:\n",
@@ -89,7 +89,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -98,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -109,7 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -118,9 +118,9 @@
     "        for j in v:\n",
     "            f_name = j.split(\"/\")[-1]\n",
     "            text_f_name = f_name.split(\".wav\")[0] + \".txt\"\n",
-    "            os.makedirs(f\"{dataset_path}/{sp_id}\", exist_ok=True)\n",
-    "            shutil.copy(j, f\"{dataset_path}/{sp_id}/{f_name}\")\n",
-    "            shutil.copy(j.replace(\".wav\", \".normalized.txt\"), f\"{dataset_path}/{sp_id}/{text_f_name}\")"
+    "            os.makedirs(os.path.join(dataset_path, sp_id), exist_ok=True)\n",
+    "            shutil.copy(j, os.path.join(dataset_path, sp_id, f_name))\n",
+    "            shutil.copy(j.replace(\".wav\", \".normalized.txt\"), os.path.join(dataset_path, sp_id, text_f_name))"
    ]
   }
  ],
@@ -140,9 +140,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.3"
+   "version": "3.7.7"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`# -- coding: utf-8 --`
`2`		`-# Copyright 2020 Minh Nguyen (@dathudeptrai)`
	`2`	`+# Copyright 2020 TensorFlowTTS Team.`
`3`	`3`	`#`
`4`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`# you may not use this file except in compliance with the License.`