fix(model): incorrect metrics

nglehuy · nglehuy · commit 2a40a6ca4aa6 · 2021-10-27T14:30:00.000+07:00
diff --git a/README.md b/README.md
@@ -38,8 +38,8 @@ TensorFlowASR implements some automatic speech recognition architectures such as
   - [Baselines](#baselines)
   - [Publications](#publications)
 - [Installation](#installation)
+  - [Installing from source (recommended)](#installing-from-source-recommended)
   - [Installing via PyPi](#installing-via-pypi)
-  - [Installing from source](#installing-from-source)
   - [Running in a container](#running-in-a-container)
 - [Setup training and testing](#setup-training-and-testing)
 - [TFLite Convertion](#tflite-convertion)
@@ -59,42 +59,33 @@ TensorFlowASR implements some automatic speech recognition architectures such as
 
 ### Baselines
 
-- **CTCModel** (End2end models using CTC Loss for training, currently supported DeepSpeech2, Jasper)
 - **Transducer Models** (End2end models using RNNT Loss for training, currently supported Conformer, ContextNet, Streaming Transducer)
+- **CTCModel** (End2end models using CTC Loss for training, currently supported DeepSpeech2, Jasper)
 
 ### Publications
 
-- **Deep Speech 2** (Reference: [https://arxiv.org/abs/1512.02595](https://arxiv.org/abs/1512.02595))
-  See [examples/deepspeech2](./examples/deepspeech2)
-- **Jasper** (Reference: [https://arxiv.org/abs/1904.03288](https://arxiv.org/abs/1904.03288))
-  See [examples/jasper](./examples/jasper)
 - **Conformer Transducer** (Reference: [https://arxiv.org/abs/2005.08100](https://arxiv.org/abs/2005.08100))
   See [examples/conformer](./examples/conformer)
 - **Streaming Transducer** (Reference: [https://arxiv.org/abs/1811.06621](https://arxiv.org/abs/1811.06621))
   See [examples/streaming_transducer](./examples/streaming_transducer)
 - **ContextNet** (Reference: [http://arxiv.org/abs/2005.03191](http://arxiv.org/abs/2005.03191))
   See [examples/contextnet](./examples/contextnet)
+- **Deep Speech 2** (Reference: [https://arxiv.org/abs/1512.02595](https://arxiv.org/abs/1512.02595))
+  See [examples/deepspeech2](./examples/deepspeech2)
+- **Jasper** (Reference: [https://arxiv.org/abs/1904.03288](https://arxiv.org/abs/1904.03288))
+  See [examples/jasper](./examples/jasper)
 
 ## Installation
 
 For training and testing, you should use `git clone` for installing necessary packages from other authors (`ctc_decoders`, `rnnt_loss`, etc.)
 
-### Installing via PyPi
-
-For tensorflow 2.3.x, run `pip3 install -U 'TensorFlowASR[tf2.3]'` or `pip3 install -U 'TensorFlowASR[tf2.3-gpu]'`
-
-For tensorflow 2.4.x, run `pip3 install -U 'TensorFlowASR[tf2.4]'` or `pip3 install -U 'TensorFlowASR[tf2.4-gpu]'`
-
-For tensorflow 2.5.x, run `pip3 install -U 'TensorFlowASR[tf2.5]'` or `pip3 install -U 'TensorFlowASR[tf2.5-gpu]'`
-
-For tensorflow 2.6.x, run `pip3 install -U 'TensorFlowASR[tf2.6]'` or `pip3 install -U 'TensorFlowASR[tf2.6-gpu]'`
-
-### Installing from source
+### Installing from source (recommended)
 
 ```bash
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-pip3 install -e '.[tf2.6]' # see other options in setup.py file
+# Tensorflow 2.x (with 2.x >= 2.3)
+pip3 install -e ".[tf2.x]" # or ".[tf2.x-gpu]"
 ```
 
 For anaconda3:
@@ -105,9 +96,18 @@ conda activate tfasr
 pip install -U tensorflow-gpu # upgrade to latest version of tensorflow
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-pip3 install -e '.[tf2.3]' # or '.[tf2.3-gpu]' or '.[tf2.4]' or '.[tf2.4-gpu]' or '.[tf2.5]' or '.[tf2.5-gpu]' 
+# Tensorflow 2.x (with 2.x >= 2.3)
+pip3 install -e ".[tf2.x]" # or ".[tf2.x-gpu]"
 ```
 
+### Installing via PyPi
+
+```bash
+# Tensorflow 2.x (with 2.x >= 2.3)
+pip3 install -U "TensorFlowASR[tf2.x]" # or pip3 install -U "TensorFlowASR[tf2.x-gpu]"
+```
+
+
 ### Running in a container
 
 ```bash
diff --git a/examples/conformer/config.yml b/examples/conformer/config.yml
@@ -31,7 +31,7 @@ decoder_config:
   beam_width: 0
   norm_score: True
   corpus_files:
-    - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
+    - /mnt/Data/MLDL/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
 
 model_config:
   name: conformer
@@ -75,8 +75,8 @@ learning_config:
           num_masks: 1
           mask_factor: 27
     data_paths:
-      - /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
-    tfrecords_dir: /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/tfrecords_1030
+      - /mnt/Data/MLDL/Datasets/ASR/Raw/LibriSpeech/train-clean-100/transcripts.tsv
+    tfrecords_dir: /mnt/Data/MLDL/Datasets/ASR/Raw/LibriSpeech/tfrecords_1030
     shuffle: True
     cache: True
     buffer_size: 100
@@ -86,7 +86,7 @@ learning_config:
   eval_dataset_config:
     use_tf: True
     data_paths: null
-    tfrecords_dir: /mnt/h/ML/Datasets/ASR/Raw/LibriSpeech/tfrecords_1030
+    tfrecords_dir: /mnt/Data/MLDL/Datasets/ASR/Raw/LibriSpeech/tfrecords_1030
     shuffle: False
     cache: True
     buffer_size: 100
@@ -113,13 +113,13 @@ learning_config:
     batch_size: 2
     num_epochs: 50
     checkpoint:
-      filepath: /mnt/e/Models/local/conformer/checkpoints/{epoch:02d}.h5
+      filepath: /mnt/Miscellanea/Models/local/conformer/checkpoints/{epoch:02d}.h5
       save_best_only: True
       save_weights_only: True
       save_freq: epoch
-    states_dir: /mnt/e/Models/local/conformer/states
+    states_dir: /mnt/Miscellanea/Models/local/conformer/states
     tensorboard:
-      log_dir: /mnt/e/Models/local/conformer/tensorboard
+      log_dir: /mnt/Miscellanea/Models/local/conformer/tensorboard
       histogram_freq: 1
       write_graph: True
       write_images: True
diff --git a/examples/conformer/saved_model.py b/examples/conformer/saved_model.py
@@ -27,38 +27,23 @@
 parser = argparse.ArgumentParser(prog="Conformer Testing")
 
 parser.add_argument(
-    "--config",
-    type=str,
-    default=DEFAULT_YAML,
-    help="The file path of model configuration file",
+    "--config", type=str, default=DEFAULT_YAML, help="The file path of model configuration file",
 )
 
 parser.add_argument(
-    "--h5",
-    type=str,
-    default=None,
-    help="Path to saved h5 weights",
+    "--h5", type=str, default=None, help="Path to saved h5 weights",
 )
 
 parser.add_argument(
-    "--sentence_piece",
-    default=False,
-    action="store_true",
-    help="Whether to use `SentencePiece` model",
+    "--sentence_piece", default=False, action="store_true", help="Whether to use `SentencePiece` model",
 )
 
 parser.add_argument(
-    "--subwords",
-    default=False,
-    action="store_true",
-    help="Use subwords",
+    "--subwords", default=False, action="store_true", help="Use subwords",
 )
 
 parser.add_argument(
-    "--output_dir",
-    type=str,
-    default=None,
-    help="Output directory for saved model",
+    "--output_dir", type=str, default=None, help="Output directory for saved model",
 )
 
 args = parser.parse_args()
diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,7 @@
 SoundFile==0.10.3.post1
 tensorflow_datasets==4.4.0
 nltk==3.6.4
-numpy==1.19.5
+numpy>=1.19.5
 sentencepiece==0.1.96
 tqdm==4.62.1
 librosa==0.8.1
diff --git a/setup.py b/setup.py
@@ -30,7 +30,7 @@
 
 setuptools.setup(
     name="TensorFlowASR",
-    version="1.0.3",
+    version="1.0.2",
     author="Huy Le Nguyen",
     author_email="nlhuy.cs.16@gmail.com",
     description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",
diff --git a/tensorflow_asr/models/base_model.py b/tensorflow_asr/models/base_model.py
@@ -41,50 +41,42 @@ def save(
             )
 
     def save_weights(
-        self,
-        filepath,
-        overwrite=True,
-        save_format=None,
-        options=None,
+        self, filepath, overwrite=True, save_format=None, options=None,
     ):
         with file_util.save_file(filepath) as path:
             super().save_weights(filepath=path, overwrite=overwrite, save_format=save_format, options=options)
 
     def load_weights(
-        self,
-        filepath,
-        by_name=False,
-        skip_mismatch=False,
-        options=None,
+        self, filepath, by_name=False, skip_mismatch=False, options=None,
     ):
         with file_util.read_file(filepath) as path:
             super().load_weights(filepath=path, by_name=by_name, skip_mismatch=skip_mismatch, options=options)
 
+    @property
+    def metrics(self):
+        if not hasattr(self, "_tfasr_metrics"):
+            self._tfasr_metrics = {}
+        return list(self._tfasr_metrics.values())
+
     def add_metric(
-        self,
-        metric: tf.keras.metrics.Metric,
+        self, metric: tf.keras.metrics.Metric,
     ):
-        if not hasattr(self, "_metrics"):
-            self._metrics = {}
-        self._metrics[metric.name] = metric
+        if not hasattr(self, "_tfasr_metrics"):
+            self._tfasr_metrics = {}
+        self._tfasr_metrics[metric.name] = metric
 
     def make(self, *args, **kwargs):
         """Custom function for building model (uses self.build so cannot overwrite that function)"""
         raise NotImplementedError()
 
     def compile(
-        self,
-        loss,
-        optimizer,
-        run_eagerly=None,
-        **kwargs,
+        self, loss, optimizer, run_eagerly=None, **kwargs,
     ):
         self.use_loss_scale = False
         if not env_util.has_devices("TPU"):
             optimizer = mxp.experimental.LossScaleOptimizer(tf.keras.optimizers.get(optimizer), "dynamic")
             self.use_loss_scale = True
-        loss_metric = tf.keras.metrics.Mean(name="loss", dtype=tf.float32)
-        self.add_metric(loss_metric)
+        self.add_metric(metric=tf.keras.metrics.Mean(name="loss", dtype=tf.float32))
         super().compile(optimizer=optimizer, loss=loss, run_eagerly=run_eagerly, **kwargs)
 
     # -------------------------------- STEP FUNCTIONS -------------------------------------
@@ -110,8 +102,8 @@ def train_step(self, batch):
         else:
             gradients = tape.gradient(loss, self.trainable_weights)
         self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
-        self._metrics["loss"].update_state(loss)
-        return {m.name: m.result() for m in self._metrics.values()}
+        self._tfasr_metrics["loss"].update_state(loss)
+        return {m.name: m.result() for m in self.metrics}
 
     def test_step(self, batch):
         """
@@ -125,8 +117,8 @@ def test_step(self, batch):
         inputs, y_true = batch
         y_pred = self(inputs, training=False)
         loss = self.loss(y_true, y_pred)
-        self._metrics["loss"].update_state(loss)
-        return {m.name: m.result() for m in self._metrics.values()}
+        self._tfasr_metrics["loss"].update_state(loss)
+        return {m.name: m.result() for m in self.metrics}
 
     def predict_step(self, batch):
         """