TensorSpeech
diff --git a/‎examples/contextnet/config.yml‎
Lines changed: 14 additions & 1 deletion b/‎examples/contextnet/config.yml‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎examples/contextnet/train_keras_subword_contextnet.py‎
Lines changed: 151 additions & 0 deletions b/‎examples/contextnet/train_keras_subword_contextnet.py‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎examples/deepspeech2/config.yml‎
Lines changed: 14 additions & 1 deletion b/‎examples/deepspeech2/config.yml‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎examples/deepspeech2/train_keras_ds2.py‎
Lines changed: 124 additions & 0 deletions b/‎examples/deepspeech2/train_keras_ds2.py‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎examples/jasper/config.yml‎
Lines changed: 13 additions & 0 deletions b/‎examples/jasper/config.yml‎
Lines changed: 13 additions & 0 deletions
@@ -213,7 +213,7 @@ learning_config:
       - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
     test_paths:
       - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
-    tfrecords_dir: null
+    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
 
   optimizer_config:
     warmup_steps: 40000
@@ -229,3 +229,16 @@ learning_config:
     log_interval_steps: 300
     eval_interval_steps: 500
     save_interval_steps: 1000
+    checkpoint:
+      filepath: /mnt/Miscellanea/Models/local/contextnet/checkpoints/{epoch:02d}.h5
+      save_best_only: True
+      save_weights_only: False
+      save_freq: epoch
+    states_dir: /mnt/Miscellanea/Models/local/contextnet/states
+    tensorboard:
+      log_dir: /mnt/Miscellanea/Models/local/contextnet/tensorboard
+      histogram_freq: 1
+      write_graph: True
+      write_images: True
+      update_freq: 'epoch'
+      profile_batch: 2
@@ -0,0 +1,151 @@
+# Copyright 2020 Huy Le Nguyen (@usimarit)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import math
+import argparse
+from tensorflow_asr.utils import setup_environment, setup_strategy
+
+setup_environment()
+import tensorflow as tf
+
+DEFAULT_YAML = os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml")
+
+tf.keras.backend.clear_session()
+
+parser = argparse.ArgumentParser(prog="ContextNet Training")
+
+parser.add_argument("--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file")
+
+parser.add_argument("--max_ckpts", type=int, default=10, help="Max number of checkpoints to keep")
+
+parser.add_argument("--tfrecords", default=False, action="store_true", help="Whether to use tfrecords")
+
+parser.add_argument("--tfrecords_shards", type=int, default=16, help="Number of tfrecords shards")
+
+parser.add_argument("--tbs", type=int, default=None, help="Train batch size per replica")
+
+parser.add_argument("--ebs", type=int, default=None, help="Evaluation batch size per replica")
+
+parser.add_argument("--devices", type=int, nargs="*", default=[0], help="Devices' ids to apply distributed training")
+
+parser.add_argument("--mxp", default=False, action="store_true", help="Enable mixed precision")
+
+parser.add_argument("--cache", default=False, action="store_true", help="Enable caching for dataset")
+
+parser.add_argument("--subwords", type=str, default=None, help="Path to file that stores generated subwords")
+
+parser.add_argument("--subwords_corpus", nargs="*", type=str, default=[], help="Transcript files for generating subwords")
+
+parser.add_argument("--bfs", type=int, default=100, help="Buffer size for shuffling")
+
+args = parser.parse_args()
+
+tf.config.optimizer.set_experimental_options({"auto_mixed_precision": args.mxp})
+
+strategy = setup_strategy(args.devices)
+
+from tensorflow_asr.configs.config import Config
+from tensorflow_asr.datasets.keras import ASRTFRecordDatasetKeras, ASRSliceDatasetKeras
+from tensorflow_asr.featurizers.speech_featurizers import TFSpeechFeaturizer
+from tensorflow_asr.featurizers.text_featurizers import SubwordFeaturizer
+from tensorflow_asr.models.keras.contextnet import ContextNet
+from tensorflow_asr.optimizers.schedules import TransformerSchedule
+
+config = Config(args.config)
+speech_featurizer = TFSpeechFeaturizer(config.speech_config)
+
+if args.subwords and os.path.exists(args.subwords):
+    print("Loading subwords ...")
+    text_featurizer = SubwordFeaturizer.load_from_file(config.decoder_config, args.subwords)
+else:
+    print("Generating subwords ...")
+    text_featurizer = SubwordFeaturizer.build_from_corpus(
+        config.decoder_config,
+        corpus_files=args.subwords_corpus
+    )
+    text_featurizer.save_to_file(args.subwords)
+
+if args.tfrecords:
+    train_dataset = ASRTFRecordDatasetKeras(
+        data_paths=config.learning_config.dataset_config.train_paths,
+        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        augmentations=config.learning_config.augmentations,
+        tfrecords_shards=args.tfrecords_shards,
+        stage="train", cache=args.cache,
+        shuffle=True, buffer_size=args.bfs,
+    )
+    eval_dataset = ASRTFRecordDatasetKeras(
+        data_paths=config.learning_config.dataset_config.eval_paths,
+        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
+        tfrecords_shards=args.tfrecords_shards,
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        stage="eval", cache=args.cache,
+        shuffle=True, buffer_size=args.bfs,
+    )
+else:
+    train_dataset = ASRSliceDatasetKeras(
+        data_paths=config.learning_config.dataset_config.train_paths,
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        augmentations=config.learning_config.augmentations,
+        stage="train", cache=args.cache,
+        shuffle=True, buffer_size=args.bfs,
+    )
+    eval_dataset = ASRSliceDatasetKeras(
+        data_paths=config.learning_config.dataset_config.eval_paths,
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        stage="eval", cache=args.cache,
+        shuffle=True, buffer_size=args.bfs,
+    )
+
+with strategy.scope():
+    global_batch_size = config.learning_config.running_config.batch_size
+    global_batch_size *= strategy.num_replicas_in_sync
+    # build model
+    contextnet = ContextNet(**config.model_config, vocabulary_size=text_featurizer.num_classes)
+    contextnet._build(speech_featurizer.shape)
+    contextnet.summary(line_length=120)
+
+    optimizer = tf.keras.optimizers.Adam(
+        TransformerSchedule(
+            d_model=contextnet.dmodel,
+            warmup_steps=config.learning_config.optimizer_config["warmup_steps"],
+            max_lr=(0.05 / math.sqrt(contextnet.dmodel))
+        ),
+        beta_1=config.learning_config.optimizer_config["beta1"],
+        beta_2=config.learning_config.optimizer_config["beta2"],
+        epsilon=config.learning_config.optimizer_config["epsilon"]
+    )
+
+    contextnet.compile(optimizer=optimizer, global_batch_size=global_batch_size, blank=text_featurizer.blank)
+
+    train_data_loader = train_dataset.create(global_batch_size)
+    eval_data_loader = eval_dataset.create(global_batch_size)
+
+    callbacks = [
+        tf.keras.callbacks.ModelCheckpoint(**config.learning_config.running_config.checkpoint),
+        tf.keras.callbacks.experimental.BackupAndRestore(config.learning_config.running_config.states_dir),
+        tf.keras.callbacks.TensorBoard(**config.learning_config.running_config.tensorboard)
+    ]
+
+    contextnet.fit(
+        train_data_loader, epochs=config.learning_config.running_config.num_epochs,
+        validation_data=eval_data_loader, callbacks=callbacks,
+        steps_per_epoch=train_dataset.total_steps
+    )
@@ -59,7 +59,7 @@ learning_config:
       - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/dev-other/transcripts.tsv
     test_paths:
       - /mnt/Miscellanea/Datasets/Speech/LibriSpeech/test-clean/transcripts.tsv
-    tfrecords_dir: null
+    tfrecords_dir: /mnt/Miscellanea/Datasets/Speech/LibriSpeech/tfrecords
 
   optimizer_config:
     class_name: adam
@@ -74,3 +74,16 @@ learning_config:
     log_interval_steps: 400
     save_interval_steps: 400
     eval_interval_steps: 800
+    checkpoint:
+      filepath: /mnt/Miscellanea/Models/local/deepspeech2/checkpoints/{epoch:02d}.h5
+      save_best_only: True
+      save_weights_only: False
+      save_freq: epoch
+    states_dir: /mnt/Miscellanea/Models/local/deepspeech2/states
+    tensorboard:
+      log_dir: /mnt/Miscellanea/Models/local/deepspeech2/tensorboard
+      histogram_freq: 1
+      write_graph: True
+      write_images: True
+      update_freq: 'epoch'
+      profile_batch: 2
@@ -0,0 +1,124 @@
+# Copyright 2020 Huy Le Nguyen (@usimarit)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import argparse
+from tensorflow_asr.utils import setup_environment, setup_strategy
+
+setup_environment()
+import tensorflow as tf
+
+DEFAULT_YAML = os.path.join(os.path.abspath(os.path.dirname(__file__)), "config.yml")
+
+tf.keras.backend.clear_session()
+
+parser = argparse.ArgumentParser(prog="Deep Speech 2 Training")
+
+parser.add_argument("--config", "-c", type=str, default=DEFAULT_YAML,
+                    help="The file path of model configuration file")
+
+parser.add_argument("--max_ckpts", type=int, default=10,
+                    help="Max number of checkpoints to keep")
+
+parser.add_argument("--tbs", type=int, default=None,
+                    help="Train batch size per replicas")
+
+parser.add_argument("--ebs", type=int, default=None,
+                    help="Evaluation batch size per replicas")
+
+parser.add_argument("--tfrecords", default=False, action="store_true",
+                    help="Whether to use tfrecords dataset")
+
+parser.add_argument("--devices", type=int, nargs="*", default=[0],
+                    help="Devices' ids to apply distributed training")
+
+parser.add_argument("--mxp", default=False, action="store_true",
+                    help="Enable mixed precision")
+
+parser.add_argument("--cache", default=False, action="store_true",
+                    help="Enable caching for dataset")
+
+args = parser.parse_args()
+
+tf.config.optimizer.set_experimental_options({"auto_mixed_precision": args.mxp})
+
+strategy = setup_strategy(args.devices)
+
+from tensorflow_asr.configs.config import Config
+from tensorflow_asr.datasets.keras import ASRTFRecordDatasetKeras, ASRSliceDatasetKeras
+from tensorflow_asr.featurizers.speech_featurizers import TFSpeechFeaturizer
+from tensorflow_asr.featurizers.text_featurizers import CharFeaturizer
+from tensorflow_asr.models.keras.deepspeech2 import DeepSpeech2
+
+config = Config(args.config)
+speech_featurizer = TFSpeechFeaturizer(config.speech_config)
+text_featurizer = CharFeaturizer(config.decoder_config)
+
+if args.tfrecords:
+    train_dataset = ASRTFRecordDatasetKeras(
+        data_paths=config.learning_config.dataset_config.train_paths,
+        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        augmentations=config.learning_config.augmentations,
+        stage="train", cache=args.cache, shuffle=True
+    )
+    eval_dataset = ASRTFRecordDatasetKeras(
+        data_paths=config.learning_config.dataset_config.eval_paths,
+        tfrecords_dir=config.learning_config.dataset_config.tfrecords_dir,
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        stage="eval", cache=args.cache, shuffle=True
+    )
+else:
+    train_dataset = ASRSliceDatasetKeras(
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        data_paths=config.learning_config.dataset_config.train_paths,
+        augmentations=config.learning_config.augmentations,
+        stage="train", cache=args.cache, shuffle=True
+    )
+    eval_dataset = ASRSliceDatasetKeras(
+        speech_featurizer=speech_featurizer,
+        text_featurizer=text_featurizer,
+        data_paths=config.learning_config.dataset_config.eval_paths,
+        stage="eval", cache=args.cache, shuffle=True
+    )
+
+# Build DS2 model
+with strategy.scope():
+    global_batch_size = config.learning_config.running_config.batch_size
+    global_batch_size *= strategy.num_replicas_in_sync
+
+    ds2_model = DeepSpeech2(**config.model_config, vocabulary_size=text_featurizer.num_classes)
+    ds2_model._build(speech_featurizer.shape)
+    ds2_model.summary(line_length=120)
+
+    ds2_model.compile(optimizer=config.learning_config.optimizer_config,
+                      global_batch_size=global_batch_size, blank=text_featurizer.blank)
+
+    train_data_loader = train_dataset.create(global_batch_size)
+    eval_data_loader = eval_dataset.create(global_batch_size)
+
+    callbacks = [
+        tf.keras.callbacks.ModelCheckpoint(**config.learning_config.running_config.checkpoint),
+        tf.keras.callbacks.experimental.BackupAndRestore(config.learning_config.running_config.states_dir),
+        tf.keras.callbacks.TensorBoard(**config.learning_config.running_config.tensorboard)
+    ]
+
+    ds2_model.fit(
+        train_data_loader, epochs=config.learning_config.running_config.num_epochs,
+        validation_data=eval_data_loader, callbacks=callbacks,
+        steps_per_epoch=train_dataset.total_steps
+    )
@@ -81,3 +81,16 @@ learning_config:
     log_interval_steps: 400
     save_interval_steps: 400
     eval_interval_steps: 800
+    checkpoint:
+      filepath: /mnt/Miscellanea/Models/local/jasper/checkpoints/{epoch:02d}.h5
+      save_best_only: True
+      save_weights_only: False
+      save_freq: epoch
+    states_dir: /mnt/Miscellanea/Models/local/jasper/states
+    tensorboard:
+      log_dir: /mnt/Miscellanea/Models/local/jasper/tensorboard
+      histogram_freq: 1
+      write_graph: True
+      write_images: True
+      update_freq: 'epoch'
+      profile_batch: 2