Update configs, use RectifiedAdam for PWGAN discriminator like in native config.

ZDisket · ZDisket · commit 1bf59a1ac679 · 2020-08-19T15:49:07.000-03:00
diff --git a/examples/multiband_pwgan/README.md b/examples/multiband_pwgan/README.md
@@ -61,11 +61,21 @@ CUDA_VISIBLE_DEVICES=0 python examples/multiband_pwgan/decode_mb_melgan.py \
   --use-norm 1
 ```
 
-## Finetune MelGAN STFT with ljspeech pretrained on other languages
-Just load pretrained model and training from scratch with other languages. **DO NOT FORGET** re-preprocessing on your dataset if needed. A hop_size should be 256 if you want to use our pretrained.
+## Finetune Multi-Band MelGAN + PWGAN Disc with ljspeech pretrained on other languages
+Download generator weights
 
+```bash
+CUDA_VISIBLE_DEVICES=0 python examples/multiband_pwgan/train_multiband_pwgan.py \
+  --train-dir ./dump/train/ \
+  --dev-dir ./dump/valid/ \
+  --outdir ./examples/multiband_pwgan/exp/train.multiband_melgan.v1/ \
+  --config ./examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml \
+  --use-norm 1 \
+  --generator_mixed_precision 1 \
+  --pretrained "ptgen.h5"
+```
 ## Learning Curves
-Here is a learning curves of melgan based on this config [`multiband_pwgan.v1.yaml`](https://github.com/dathudeptrai/TensorflowTTS/tree/master/examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml)
+Here is a learning curves of melgan based on this config [`multiband_melgan.v1.yaml`](https://github.com/dathudeptrai/TensorflowTTS/tree/master/examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml)
 
 <img src="fig/eval.png" height="300" width="850">
 
@@ -77,6 +87,9 @@ Here is a learning curves of melgan based on this config [`multiband_pwgan.v1.ya
 | [multiband_melgan.v1](https://drive.google.com/drive/folders/1Hg82YnPbX6dfF7DxVs4c96RBaiFbh-cT?usp=sharing)             | [link](https://github.com/tensorspeech/TensorFlowTTS/tree/master/examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml)          | EN    | 22.05k  | 80-7600        | 1024 / 256 / None    | 940K    |
 | [multiband_melgan.v1](https://drive.google.com/drive/folders/199XCXER51PWf_VzUpOwxfY_8XDfeXuZl?usp=sharing)             | [link](https://github.com/dathudeptrai/TensorflowTTS/tree/master/examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml)          | KO    | 22.05k  | 80-7600        | 1024 / 256 / None    | 1000K    |
 
+## Notes
+1. Using RAdam for discriminator
+
 ## Reference
 
 1. https://github.com/kan-bayashi/ParallelWaveGAN
diff --git a/examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml b/examples/multiband_pwgan/conf/multiband_pwgan.v1.yaml
@@ -79,11 +79,12 @@ generator_optimizer_params:
     amsgrad: false
 
 discriminator_optimizer_params:
-    lr_fn: "PiecewiseConstantDecay"
+    lr_fn: "ExponentialDecay"
     lr_params: 
-        boundaries: [100000, 200000, 300000, 400000, 500000]
-        values: [0.00025, 0.000125, 0.0000625, 0.00003125, 0.000015625, 0.000001]
-    amsgrad: false
+        initial_learning_rate: 0.0005
+        decay_steps: 200000
+        decay_rate: 0.5
+
 
 ###########################################################
 #                    INTERVAL SETTING                     #
diff --git a/examples/multiband_pwgan/conf/multiband_pwgan.v1ft.yaml b/examples/multiband_pwgan/conf/multiband_pwgan.v1ft.yaml
@@ -1,6 +1,6 @@
 
 # This is the hyperparameter configuration file for Multi-Band MelGAN with PWGAN discriminator.
-# This one is adjusted for finetuning
+# This one is adjusted for finetuning, used to finetune the LJSpeech pretrained on 
 
 ###########################################################
 #                FEATURE EXTRACTION SETTING               #
@@ -72,25 +72,26 @@ is_shuffle: true               # shuffle dataset after each epoch.
 generator_optimizer_params:
     lr_fn: "PiecewiseConstantDecay"
     lr_params: 
-        boundaries: [100000, 200000, 300000, 400000, 500000, 600000, 700000]
-        values: [0.0005, 0.0005, 0.00025, 0.000125, 0.0000625, 0.00003125, 0.000015625, 0.000001]
+        boundaries: [1000, 5000, 10000, 20000]
+        values: [0.00000000001, 0.000000000005, 0.000000000002, 0.0000000000005, 0.0000000000002]
     amsgrad: false
 
+
 discriminator_optimizer_params:
-    lr_fn: "PiecewiseConstantDecay"
+    lr_fn: "ExponentialDecay"
     lr_params: 
-        boundaries: [100000, 200000, 300000, 400000, 500000]
-        values: [0.00025, 0.000125, 0.0000625, 0.00003125, 0.000015625, 0.000001]
-    amsgrad: false
+        initial_learning_rate: 0.0000000005
+        decay_steps: 70000
+        decay_rate: 0.5
 
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
 discriminator_train_start_steps: 0  # steps begin training discriminator
-train_max_steps: 200000                 # Number of training steps.
-save_interval_steps: 5000               # Interval steps to save checkpoint.
-eval_interval_steps: 1000                # Interval steps to evaluate the network.
-log_interval_steps: 200                  # Interval steps to record the training log.
+train_max_steps: 10000                 # Number of training steps.
+save_interval_steps: 1500               # Interval steps to save checkpoint.
+eval_interval_steps: 500                # Interval steps to evaluate the network.
+log_interval_steps: 100                  # Interval steps to record the training log.
 
 ###########################################################
 #                     OTHER SETTING                       #