✍️ test and update train script

nglehuy · nglehuy · commit 4be4a6e062c7 · 2021-04-17T18:07:08.000+07:00
diff --git a/examples/conformer/config.yml b/examples/conformer/config.yml
@@ -115,7 +115,7 @@ learning_config:
     checkpoint:
       filepath: /mnt/e/Models/local/conformer/checkpoints/{epoch:02d}.h5
       save_best_only: True
-      save_weights_only: False
+      save_weights_only: True
       save_freq: epoch
     states_dir: /mnt/e/Models/local/conformer/states
     tensorboard:
diff --git a/examples/contextnet/config.yml b/examples/contextnet/config.yml
@@ -228,7 +228,7 @@ learning_config:
   test_dataset_config:
     use_tf: True
     data_paths:
-      - /mnt/e/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/test-clean/transcripts.tsv
     tfrecords_dir: null
     shuffle: False
     cache: True
@@ -248,7 +248,7 @@ learning_config:
     checkpoint:
       filepath: /mnt/e/Models/local/contextnet/checkpoints/{epoch:02d}.h5
       save_best_only: True
-      save_weights_only: False
+      save_weights_only: True
       save_freq: epoch
     states_dir: /mnt/e/Models/local/contextnet/states
     tensorboard:
diff --git a/examples/deepspeech2/config.yml b/examples/deepspeech2/config.yml
@@ -52,8 +52,8 @@ learning_config:
   train_dataset_config:
     use_tf: True
     data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
+    tfrecords_dir: null
     shuffle: True
     cache: True
     buffer_size: 100
@@ -62,10 +62,8 @@ learning_config:
 
   eval_dataset_config:
     use_tf: True
-    data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+    data_paths: null
+    tfrecords_dir: null
     shuffle: False
     cache: True
     buffer_size: 100
@@ -75,8 +73,8 @@ learning_config:
   test_dataset_config:
     use_tf: True
     data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/test-clean/transcripts.tsv
+    tfrecords_dir: null
     shuffle: False
     cache: True
     buffer_size: 100
@@ -91,19 +89,14 @@ learning_config:
   running_config:
     batch_size: 4
     num_epochs: 20
-    accumulation_steps: 8
-    outdir: /mnt/Miscellanea/Models/local/deepspeech2
-    log_interval_steps: 400
-    save_interval_steps: 400
-    eval_interval_steps: 800
     checkpoint:
-      filepath: /mnt/Miscellanea/Models/local/deepspeech2/checkpoints/{epoch:02d}.h5
+      filepath: /mnt/e/Models/local/deepspeech2/checkpoints/{epoch:02d}.h5
       save_best_only: True
-      save_weights_only: False
+      save_weights_only: True
       save_freq: epoch
-    states_dir: /mnt/Miscellanea/Models/local/deepspeech2/states
+    states_dir: /mnt/e/Models/local/deepspeech2/states
     tensorboard:
-      log_dir: /mnt/Miscellanea/Models/local/deepspeech2/tensorboard
+      log_dir: /mnt/e/Models/local/deepspeech2/tensorboard
       histogram_freq: 1
       write_graph: True
       write_images: True
diff --git a/examples/jasper/config.yml b/examples/jasper/config.yml
@@ -59,8 +59,8 @@ learning_config:
   train_dataset_config:
     use_tf: True
     data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
+    tfrecords_dir: null
     shuffle: True
     cache: True
     buffer_size: 100
@@ -69,10 +69,8 @@ learning_config:
 
   eval_dataset_config:
     use_tf: True
-    data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+    data_paths: null
+    tfrecords_dir: null
     shuffle: False
     cache: True
     buffer_size: 100
@@ -82,8 +80,8 @@ learning_config:
   test_dataset_config:
     use_tf: True
     data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/test-clean/transcripts.tsv
+    tfrecords_dir: null
     shuffle: False
     cache: True
     buffer_size: 100
@@ -98,19 +96,14 @@ learning_config:
   running_config:
     batch_size: 4
     num_epochs: 20
-    accumulation_steps: 8
-    outdir: /mnt/Miscellanea/Models/local/jasper
-    log_interval_steps: 400
-    save_interval_steps: 400
-    eval_interval_steps: 800
     checkpoint:
-      filepath: /mnt/Miscellanea/Models/local/jasper/checkpoints/{epoch:02d}.h5
+      filepath: /mnt/e/Models/local/jasper/checkpoints/{epoch:02d}.h5
       save_best_only: True
-      save_weights_only: False
+      save_weights_only: True
       save_freq: epoch
-    states_dir: /mnt/Miscellanea/Models/local/jasper/states
+    states_dir: /mnt/e/Models/local/jasper/states
     tensorboard:
-      log_dir: /mnt/Miscellanea/Models/local/jasper/tensorboard
+      log_dir: /mnt/e/Models/local/jasper/tensorboard
       histogram_freq: 1
       write_graph: True
       write_images: True
diff --git a/examples/rnn_transducer/config.yml b/examples/rnn_transducer/config.yml
@@ -64,8 +64,8 @@ learning_config:
           num_masks: 1
           mask_factor: 27
     data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/train-clean-100/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
+    tfrecords_dir: null
     shuffle: True
     cache: True
     buffer_size: 100
@@ -74,10 +74,8 @@ learning_config:
 
   eval_dataset_config:
     use_tf: True
-    data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-clean/transcripts.tsv
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+    data_paths: null
+    tfrecords_dir: null
     shuffle: False
     cache: True
     buffer_size: 100
@@ -87,8 +85,8 @@ learning_config:
   test_dataset_config:
     use_tf: True
     data_paths:
-      - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
-    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
+      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/test-clean/transcripts.tsv
+    tfrecords_dir: null
     shuffle: False
     cache: True
     buffer_size: 100
@@ -102,20 +100,15 @@ learning_config:
 
   running_config:
     batch_size: 2
-    accumulation_steps: 1
     num_epochs: 20
-    outdir: /mnt/Miscellanea/Models/local/streaming_transducer
-    log_interval_steps: 300
-    eval_interval_steps: 500
-    save_interval_steps: 1000
     checkpoint:
-      filepath: /mnt/Miscellanea/Models/local/streaming_transducer/checkpoints/{epoch:02d}.h5
+      filepath: /mnt/e/Models/local/rnn_transducer/checkpoints/{epoch:02d}.h5
       save_best_only: True
-      save_weights_only: False
+      save_weights_only: True
       save_freq: epoch
-    states_dir: /mnt/Miscellanea/Models/local/streaming_transducer/states
+    states_dir: /mnt/e/Models/local/rnn_transducer/states
     tensorboard:
-      log_dir: /mnt/Miscellanea/Models/local/streaming_transducer/tensorboard
+      log_dir: /mnt/e/Models/local/rnn_transducer/tensorboard
       histogram_freq: 1
       write_graph: True
       write_images: True
diff --git a/examples/rnn_transducer/train.py b/examples/rnn_transducer/train.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import math
 import argparse
 from tensorflow_asr.utils import env_util
 
@@ -58,7 +57,6 @@
 from tensorflow_asr.datasets import asr_dataset
 from tensorflow_asr.featurizers import speech_featurizers, text_featurizers
 from tensorflow_asr.models.transducer.rnn_transducer import RnnTransducer
-from tensorflow_asr.optimizers.schedules import TransformerSchedule
 
 config = Config(args.config)
 speech_featurizer = speech_featurizers.TFSpeechFeaturizer(config.speech_config)
@@ -118,18 +116,8 @@
     rnn_transducer = RnnTransducer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
     rnn_transducer._build(speech_featurizer.shape)
     rnn_transducer.summary(line_length=100)
-
-    optimizer = tf.keras.optimizers.Adam(
-        TransformerSchedule(
-            d_model=rnn_transducer.dmodel,
-            warmup_steps=config.learning_config.optimizer_config.pop("warmup_steps", 10000),
-            max_lr=(0.05 / math.sqrt(rnn_transducer.dmodel))
-        ),
-        **config.learning_config.optimizer_config
-    )
-
     rnn_transducer.compile(
-        optimizer=optimizer,
+        optimizer=config.learning_config.optimizer_config,
         experimental_steps_per_execution=args.spx,
         global_batch_size=global_batch_size,
         blank=text_featurizer.blank
diff --git a/tensorflow_asr/models/ctc/jasper.py b/tensorflow_asr/models/ctc/jasper.py
@@ -357,7 +357,7 @@ def __init__(self,
                 strides=1, padding="same",
                 kernel_regularizer=kernel_regularizer,
                 bias_regularizer=bias_regularizer,
-                name=f"{self.name}_logits"
+                name=f"{name}_logits"
             ),
             vocabulary_size=vocabulary_size,
             name=name,
diff --git a/tensorflow_asr/utils/data_util.py b/tensorflow_asr/utils/data_util.py
@@ -21,12 +21,15 @@ def create_inputs(inputs: tf.Tensor,
                   inputs_length: tf.Tensor,
                   predictions: tf.Tensor = None,
                   predictions_length: tf.Tensor = None) -> dict:
-    return {
+    data = {
         "inputs": inputs,
         "inputs_length": inputs_length,
-        "predictions": predictions,
-        "predictions_length": predictions_length
     }
+    if predictions is not None:
+        data["predictions"] = predictions
+    if predictions_length is not None:
+        data["predictions_length"] = predictions_length
+    return data
 
 
 def create_logits(logits: tf.Tensor, logits_length: tf.Tensor) -> dict:
diff --git a/tensorflow_asr/utils/env_util.py b/tensorflow_asr/utils/env_util.py
@@ -49,6 +49,8 @@ def setup_strategy(devices):
         tf.distribute.Strategy: MirroredStrategy for training one or multiple gpus
     """
     setup_devices(devices)
+    if has_tpu():
+        return setup_tpu()
     return tf.distribute.MirroredStrategy()