🚀 fixed get total steps and update summary

nglehuy · nglehuy · commit c2e1ddf24326 · 2020-12-24T23:35:59.000+07:00
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@
 </h2>
 
 <p align="center">
-TensorFlowASR implements some automatic speech recognition architectures such as DeepSpeech2, Jasper, ContextNet, Conformer, etc. These models can be converted to TFLite to reduce memory and computation for deployment :smile:
+TensorFlowASR implements some automatic speech recognition architectures such as DeepSpeech2, Jasper, RNN Transducer, ContextNet, Conformer, etc. These models can be converted to TFLite to reduce memory and computation for deployment :smile:
 </p>
 
 ## What's New?
@@ -95,7 +95,7 @@ python3 setup.py install
 For anaconda3:
 
 ```bash
-conda create -y -n tfasr tensorflow-gpu python=3.7 # tensorflow if using CPU
+conda create -y -n tfasr tensorflow-gpu python=3.8 # tensorflow if using CPU
 conda activate tfasr
 pip install -U tensorflow-gpu # upgrade to latest version of tensorflow
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
diff --git a/examples/deepspeech2/README.md b/examples/deepspeech2/README.md
@@ -27,6 +27,6 @@ model_config:
 
 ## Training and Testing
 
-See `python examples/deepspeech2/train_ds2.py --help`
+See `python examples/deepspeech2/train_*.py --help`
 
-See `python examples/deepspeech2/test_ds2.py --help`
+See `python examples/deepspeech2/test_*.py --help`
diff --git a/examples/jasper/README.md b/examples/jasper/README.md
@@ -35,6 +35,6 @@ model_config:
 
 ## Training and Testing
 
-See `python examples/jasper/train_jasper.py --help`
+See `python examples/jasper/train_*.py --help`
 
-See `python examples/jasper/test_jasper.py --help`
+See `python examples/jasper/test_*.py --help`
diff --git a/examples/streaming_transducer/README.md b/examples/streaming_transducer/README.md
@@ -26,19 +26,21 @@ decoder_config:
 
 model_config:
   name: streaming_transducer
-  reduction_factor: 2
-  reduction_positions: [1]
-  encoder_dim: 320
-  encoder_units: 1024
-  encoder_layers: 7
+  encoder_reductions:
+    0: 3
+    1: 2
+  encoder_dmodel: 320
+  encoder_rnn_type: lstm
+  encoder_rnn_units: 1024
+  encoder_nlayers: 8
   encoder_layer_norm: True
-  encoder_type: lstm
-  embed_dim: 320
-  embed_dropout: 0.1
-  num_rnns: 1
-  rnn_units: 320
-  rnn_type: lstm
-  layer_norm: True
+  prediction_embed_dim: 320
+  prediction_embed_dropout: 0.0
+  prediction_num_rnns: 2
+  prediction_rnn_units: 1024
+  prediction_rnn_type: lstm
+  prediction_projection_units: 320
+  prediction_layer_norm: True
   joint_dim: 320
 
 learning_config:
@@ -69,8 +71,8 @@ learning_config:
 
 ## Usage
 
-Training, see `python examples/streamingTransducer/train_streaming_transducer.py --help`
+Training, see `python examples/streamingTransducer/train_*.py --help`
 
-Testing, see `python examples/streamingTransducer/train_streaming_transducer.py --help`
+Testing, see `python examples/streamingTransducer/test_*.py --help`
 
-TFLite Conversion, see `python examples/streamingTransducer/tflite_streaming_transducer.py --help`
+TFLite Conversion, see `python examples/streamingTransducer/tflite_*.py --help`
diff --git a/setup.py b/setup.py
@@ -26,9 +26,7 @@
     "soundfile>=0.10.3",
     "PyYAML>=5.3.1",
     "matplotlib>=3.2.1",
-    "numpy>=1.16.0,<1.19.0",
     "sox>=1.3.7",
-    "nltk>=3.5",
     "numba==0.49.1",
     "tqdm>=4.51.0",
     "colorama>=0.4.3",
@@ -37,7 +35,7 @@
 
 setuptools.setup(
     name="TensorFlowASR",
-    version="0.5.3",
+    version="0.5.4",
     author="Huy Le Nguyen",
     author_email="nlhuy.cs.16@gmail.com",
     description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",
diff --git a/tensorflow_asr/models/transducer.py b/tensorflow_asr/models/transducer.py
@@ -242,6 +242,7 @@ def _build(self, input_shape):
         self([inputs, input_length, pred, pred_length], training=False)
 
     def summary(self, line_length=None, **kwargs):
+        if self.encoder is not None: self.encoder.summary(line_length=line_length, **kwargs)
         self.predict_net.summary(line_length=line_length, **kwargs)
         self.joint_net.summary(line_length=line_length, **kwargs)
         super(Transducer, self).summary(line_length=line_length, **kwargs)
diff --git a/tensorflow_asr/utils/utils.py b/tensorflow_asr/utils/utils.py
@@ -63,8 +63,8 @@ def bytes_to_string(array: np.ndarray, encoding: str = "utf-8"):
 
 
 def get_num_batches(samples, batch_size, drop_remainders=True):
-    if drop_remainders:
-        return math.floor(float(samples) / float(batch_size))
+    if samples is None or batch_size is None: return None
+    if drop_remainders: return math.floor(float(samples) / float(batch_size))
     return math.ceil(float(samples) / float(batch_size))