Merge branch 'master' into add-lbttspt

ZDisket · web-flow · commit ffcc42c132e3 · 2020-10-13T12:32:55.000-03:00
diff --git a/examples/fastspeech/conf/fastspeech.v1.yaml b/examples/fastspeech/conf/fastspeech.v1.yaml
@@ -61,7 +61,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable 
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech/conf/fastspeech.v3.yaml b/examples/fastspeech/conf/fastspeech.v3.yaml
@@ -61,7 +61,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable     
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech2/README.md b/examples/fastspeech2/README.md
@@ -36,6 +36,8 @@ If you want to finetune a model, use `--pretrained` like this with your model fi
 --pretrained pretrained.h5
 ```
 
+You can also define `var_train_expr` in config file to let model training only on some layers in case you want to fine-tune on your dataset with the same pretrained language and processor. For example, `var_train_expr: "embeddings|encoder|decoder"` means we just training all variables that `embeddings`, `encoder`, `decoder` exist in its name.
+
 
 ### Step 3: Decode mel-spectrogram from folder ids
 
diff --git a/examples/fastspeech2/conf/fastspeech2.baker.v2.yaml b/examples/fastspeech2/conf/fastspeech2.baker.v2.yaml
@@ -63,7 +63,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech2/conf/fastspeech2.kss.v1.yaml b/examples/fastspeech2/conf/fastspeech2.kss.v1.yaml
@@ -62,7 +62,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable 
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech2/conf/fastspeech2.kss.v2.yaml b/examples/fastspeech2/conf/fastspeech2.kss.v2.yaml
@@ -63,7 +63,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech2/conf/fastspeech2.v1.yaml b/examples/fastspeech2/conf/fastspeech2.v1.yaml
@@ -61,7 +61,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech2/conf/fastspeech2.v2.yaml b/examples/fastspeech2/conf/fastspeech2.v2.yaml
@@ -62,7 +62,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable   
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/fastspeech2_libritts/conf/fastspeech2libritts.yaml b/examples/fastspeech2_libritts/conf/fastspeech2libritts.yaml
@@ -61,7 +61,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|encoder|decoder' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/tacotron2/README.md b/examples/tacotron2/README.md
@@ -87,9 +87,9 @@ tacotron2 = TFTacotron2(config=tacotron_config, training=True, name='tacotron2')
 tacotron2._build()
 tacotron2.summary()
 tacotron2.load_weights("./examples/tacotron2/exp/train.tacotron2.v1/checkpoints/model-120000.h5", by_name=True, skip_mismatch=True)
-
 ... # training as normal.
 ```
+You can also define `var_train_expr` in config file to let model training only on some layers in case you want to fine-tune on your dataset with the same pretrained language and processor. For example, `var_train_expr: "embeddings|encoder|decoder"` means we just training all variables that `embeddings`, `encoder`, `decoder` exist in its name.
 
 ## Results
 Here is a result of tacotron2 based on this config [`tacotron2.v1.yaml`](https://github.com/dathudeptrai/TensorflowTTS/blob/tacotron-2-example/examples/tacotron-2/conf/tacotron2.v1.yaml) but with reduction_factor = 7, we will update learning curves for reduction_factor = 1.
diff --git a/examples/tacotron2/conf/tacotron2.baker.v1.yaml b/examples/tacotron2/conf/tacotron2.baker.v1.yaml
@@ -64,8 +64,10 @@ optimizer_params:
     decay_steps: 150000          # < train_max_steps is recommend.
     warmup_proportion: 0.02
     weight_decay: 0.001
-    
-    
+
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|decoder_cell' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/tacotron2/conf/tacotron2.kss.v1.yaml b/examples/tacotron2/conf/tacotron2.kss.v1.yaml
@@ -64,8 +64,10 @@ optimizer_params:
     decay_steps: 150000          # < train_max_steps is recommend.
     warmup_proportion: 0.02
     weight_decay: 0.001
-    
-    
+
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|decoder_cell' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variable
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/examples/tacotron2/conf/tacotron2.v1.yaml b/examples/tacotron2/conf/tacotron2.v1.yaml
@@ -65,7 +65,9 @@ optimizer_params:
     warmup_proportion: 0.02
     weight_decay: 0.001
     
-    
+var_train_expr: null  # trainable variable expr (eg. 'embeddings|decoder_cell' )
+                      # must separate by |. if var_train_expr is null then we 
+                      # training all variables.
 ###########################################################
 #                    INTERVAL SETTING                     #
 ###########################################################
diff --git a/tensorflow_tts/trainers/base_trainer.py b/tensorflow_tts/trainers/base_trainer.py
@@ -606,6 +606,9 @@ def __init__(
         super().__init__(steps, epochs, config)
         self._is_mixed_precision = is_mixed_precision
         self._strategy = strategy
+        self._model = None
+        self._optimizer = None
+        self._trainable_variables = None
 
         # check if we already apply input_signature for train_step.
         self._already_apply_input_signature = False
@@ -640,6 +643,23 @@ def get_n_gpus(self):
     def compile(self, model, optimizer):
         self.set_model(model)
         self.set_optimizer(optimizer)
+        self._trainable_variables = self._train_vars()
+
+    def _train_vars(self):
+        if self.config["var_train_expr"]:
+            list_freeze_var = self.config["var_train_expr"].split("|")
+            return [
+                v
+                for v in self._model.trainable_variables
+                if self._check_string_exist(list_freeze_var, v.name)
+            ]
+        return self._model.trainable_variables
+
+    def _check_string_exist(self, list_string, inp_string):
+        for string in list_string:
+            if string in inp_string:
+                return True
+        return False
 
     def _get_train_element_signature(self):
         return self.train_data_loader.element_spec
@@ -704,9 +724,7 @@ def _one_step_forward_per_replica(self, batch):
                 per_replica_losses, self._model.trainable_variables
             )
 
-        self._optimizer.apply_gradients(
-            zip(gradients, self._model.trainable_variables), 1.0
-        )
+        self._optimizer.apply_gradients(zip(gradients, self._trainable_variables), 1.0)
 
         # accumulate loss into metrics
         self.update_train_metrics(dict_metrics_losses)
diff --git a/test/test_fastspeech2.py b/test/test_fastspeech2.py
@@ -15,12 +15,16 @@
 
 import logging
 import os
+import yaml
 
 import pytest
 import tensorflow as tf
 
 from tensorflow_tts.configs import FastSpeech2Config
 from tensorflow_tts.models import TFFastSpeech2
+from tensorflow_tts.utils import return_strategy
+
+from examples.fastspeech2.train_fastspeech2 import FastSpeech2Trainer
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
@@ -30,6 +34,45 @@
 )
 
 
+@pytest.mark.parametrize(
+    "var_train_expr, config_path",
+    [
+        (None, "./examples/fastspeech2/conf/fastspeech2.v1.yaml"),
+        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.v1.yaml"),
+        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.v2.yaml"),
+        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.baker.v2.yaml"),
+        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.kss.v1.yaml"),
+        ("embeddings|encoder", "./examples/fastspeech2/conf/fastspeech2.kss.v2.yaml"),
+    ],
+)
+def test_fastspeech2_train_some_layers(var_train_expr, config_path):
+    config = FastSpeech2Config(n_speakers=5)
+    model = TFFastSpeech2(config)
+    model._build()
+    optimizer = tf.keras.optimizers.Adam(lr=0.001)
+
+    with open(config_path) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+
+    config.update({"outdir": "./"})
+    config.update({"var_train_expr": var_train_expr})
+
+    STRATEGY = return_strategy()
+
+    trainer = FastSpeech2Trainer(
+        config=config, strategy=STRATEGY, steps=0, epochs=0, is_mixed_precision=False,
+    )
+    trainer.compile(model, optimizer)
+
+    len_trainable_vars = len(trainer._trainable_variables)
+    all_trainable_vars = len(model.trainable_variables)
+
+    if var_train_expr is None:
+        tf.debugging.assert_equal(len_trainable_vars, all_trainable_vars)
+    else:
+        tf.debugging.assert_less(len_trainable_vars, all_trainable_vars)
+
+
 @pytest.mark.parametrize("num_hidden_layers,n_speakers", [(2, 1), (3, 2), (4, 3)])
 def test_fastspeech_trainable(num_hidden_layers, n_speakers):
     config = FastSpeech2Config(
@@ -55,12 +98,7 @@ def test_fastspeech_trainable(num_hidden_layers, n_speakers):
     def one_step_training():
         with tf.GradientTape() as tape:
             mel_outputs_before, _, duration_outputs, _, _ = fastspeech2(
-                input_ids,
-                speaker_ids,
-                duration_gts,
-                f0_gts,
-                energy_gts,
-                training=True,
+                input_ids, speaker_ids, duration_gts, f0_gts, energy_gts, training=True,
             )
             duration_loss = tf.keras.losses.MeanSquaredError()(
                 duration_gts, duration_outputs
diff --git a/test/test_tacotron2.py b/test/test_tacotron2.py
@@ -16,13 +16,17 @@
 import logging
 import os
 import time
+import yaml
 
 import numpy as np
 import pytest
 import tensorflow as tf
 
 from tensorflow_tts.configs import Tacotron2Config
 from tensorflow_tts.models import TFTacotron2
+from tensorflow_tts.utils import return_strategy
+
+from examples.tacotron2.train_tacotron2 import Tacotron2Trainer
 
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 
@@ -32,6 +36,46 @@
 )
 
 
+@pytest.mark.parametrize(
+    "var_train_expr, config_path",
+    [
+        ("embeddings|decoder_cell", "./examples/tacotron2/conf/tacotron2.v1.yaml"),
+        (None, "./examples/tacotron2/conf/tacotron2.v1.yaml"),
+        (
+            "embeddings|decoder_cell",
+            "./examples/tacotron2/conf/tacotron2.baker.v1.yaml",
+        ),
+        ("embeddings|decoder_cell", "./examples/tacotron2/conf/tacotron2.kss.v1.yaml"),
+    ],
+)
+def test_tacotron2_train_some_layers(var_train_expr, config_path):
+    config = Tacotron2Config(n_speakers=5, reduction_factor=1)
+    model = TFTacotron2(config, training=True)
+    model._build()
+    optimizer = tf.keras.optimizers.Adam(lr=0.001)
+
+    with open(config_path) as f:
+        config = yaml.load(f, Loader=yaml.Loader)
+
+    config.update({"outdir": "./"})
+    config.update({"var_train_expr": var_train_expr})
+
+    STRATEGY = return_strategy()
+
+    trainer = Tacotron2Trainer(
+        config=config, strategy=STRATEGY, steps=0, epochs=0, is_mixed_precision=False,
+    )
+    trainer.compile(model, optimizer)
+
+    len_trainable_vars = len(trainer._trainable_variables)
+    all_trainable_vars = len(model.trainable_variables)
+
+    if var_train_expr is None:
+        tf.debugging.assert_equal(len_trainable_vars, all_trainable_vars)
+    else:
+        tf.debugging.assert_less(len_trainable_vars, all_trainable_vars)
+
+
 @pytest.mark.parametrize(
     "n_speakers, n_chars, max_input_length, max_mel_length, batch_size",
     [(2, 15, 25, 50, 2),],