TensorSpeech
diff --git a/‎examples/conformer/config.yml‎
Lines changed: 1 addition & 0 deletions b/‎examples/conformer/config.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/contextnet/config.yml‎
Lines changed: 1 addition & 0 deletions b/‎examples/contextnet/config.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/deepspeech2/README.md‎
Lines changed: 2 additions & 1 deletion b/‎examples/deepspeech2/README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/jasper/README.md‎
Lines changed: 2 additions & 1 deletion b/‎examples/jasper/README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎examples/streaming_transducer/config.yml‎
Lines changed: 1 addition & 0 deletions b/‎examples/streaming_transducer/config.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorflow_asr/models/conformer.py‎
Lines changed: 2 additions & 0 deletions b/‎tensorflow_asr/models/conformer.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tensorflow_asr/models/contextnet.py‎
Lines changed: 3 additions & 1 deletion b/‎tensorflow_asr/models/contextnet.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎tensorflow_asr/models/ctc.py‎
Lines changed: 3 additions & 2 deletions b/‎tensorflow_asr/models/ctc.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎tensorflow_asr/models/streaming_transducer.py‎
Lines changed: 2 additions & 0 deletions b/‎tensorflow_asr/models/streaming_transducer.py‎
Lines changed: 2 additions & 0 deletions
@@ -56,6 +56,7 @@ model_config:
   prediction_layer_norm: True
   prediction_projection_units: 0
   joint_dim: 320
+  joint_activation: tanh
 
 learning_config:
   augmentations:
 
@@ -192,6 +192,7 @@ model_config:
   prediction_layer_norm: True
   prediction_projection_units: 0
   joint_dim: 640
+  joint_activation: tanh
 
 learning_config:
   augmentations:
 
@@ -29,4 +29,5 @@ model_config:
 
 See `python examples/deepspeech2/train_*.py --help`
 
-See `python examples/deepspeech2/test_*.py --help`
+See `python examples/deepspeech2/test_*.py --help`
+
@@ -37,4 +37,5 @@ model_config:
 
 See `python examples/jasper/train_*.py --help`
 
-See `python examples/jasper/test_*.py --help`
+See `python examples/jasper/test_*.py --help`
+
@@ -49,6 +49,7 @@ model_config:
   prediction_projection_units: 320
   prediction_layer_norm: True
   joint_dim: 320
+  joint_activation: tanh
 
 learning_config:
   augmentations:
 
@@ -33,7 +33,7 @@
 
 setuptools.setup(
     name="TensorFlowASR",
-    version="0.6.0",
+    version="0.6.1",
     author="Huy Le Nguyen",
     author_email="[email protected]",
     description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",
 
@@ -384,6 +384,7 @@ def __init__(self,
                  prediction_layer_norm: bool = True,
                  prediction_projection_units: int = 0,
                  joint_dim: int = 1024,
+                 joint_activation: str = "tanh",
                  kernel_regularizer=L2,
                  bias_regularizer=L2,
                  name: str = "conformer_transducer",
@@ -414,6 +415,7 @@ def __init__(self,
             layer_norm=prediction_layer_norm,
             projection_units=prediction_projection_units,
             joint_dim=joint_dim,
+            joint_activation=joint_activation,
             kernel_regularizer=kernel_regularizer,
             bias_regularizer=bias_regularizer,
             name=name, **kwargs
 
@@ -196,7 +196,7 @@ class ContextNet(Transducer):
     def __init__(self,
                  vocabulary_size: int,
                  encoder_blocks: List[dict],
-                 encoder_alpha: float,
+                 encoder_alpha: float = 0.5,
                  prediction_embed_dim: int = 512,
                  prediction_embed_dropout: int = 0,
                  prediction_num_rnns: int = 1,
@@ -206,6 +206,7 @@ def __init__(self,
                  prediction_layer_norm: bool = True,
                  prediction_projection_units: int = 0,
                  joint_dim: int = 1024,
+                 joint_activation: str = "tanh",
                  kernel_regularizer=L2,
                  bias_regularizer=L2,
                  name: str = "contextnet",
@@ -228,6 +229,7 @@ def __init__(self,
             layer_norm=prediction_layer_norm,
             projection_units=prediction_projection_units,
             joint_dim=joint_dim,
+            joint_activation=joint_activation,
             kernel_regularizer=kernel_regularizer,
             bias_regularizer=bias_regularizer,
             name=name, **kwargs
 
@@ -25,6 +25,7 @@
 class CtcModel(Model):
     def __init__(self, **kwargs):
         super(CtcModel, self).__init__(**kwargs)
+        self.time_reduction_factor = 1
 
     def _build(self, input_shape):
         features = tf.keras.Input(input_shape, dtype=tf.float32)
@@ -67,7 +68,7 @@ def recognize_tflite(self, signal):
         features = self.speech_featurizer.tf_extract(signal)
         features = tf.expand_dims(features, axis=0)
         input_length = shape_list(features)[1]
-        input_length = get_reduced_length(input_length, self.base_model.time_reduction_factor)
+        input_length = get_reduced_length(input_length, self.time_reduction_factor)
         input_length = tf.expand_dims(input_length, axis=0)
         logits = self(features, training=False)
         probs = tf.nn.softmax(logits)
@@ -113,7 +114,7 @@ def recognize_beam_tflite(self, signal):
         features = self.speech_featurizer.tf_extract(signal)
         features = tf.expand_dims(features, axis=0)
         input_length = shape_list(features)[1]
-        input_length = get_reduced_length(input_length, self.base_model.time_reduction_factor)
+        input_length = get_reduced_length(input_length, self.time_reduction_factor)
         input_length = tf.expand_dims(input_length, axis=0)
         logits = self(features, training=False)
         probs = tf.nn.softmax(logits)
 
@@ -192,6 +192,7 @@ def __init__(self,
                  prediction_layer_norm: bool = True,
                  prediction_projection_units: int = 640,
                  joint_dim: int = 640,
+                 joint_activation: str = "tanh",
                  kernel_regularizer = None,
                  bias_regularizer = None,
                  name = "StreamingTransducer",
@@ -217,6 +218,7 @@ def __init__(self,
             layer_norm=prediction_layer_norm,
             projection_units=prediction_projection_units,
             joint_dim=joint_dim,
+            joint_activation=joint_activation,
             kernel_regularizer=kernel_regularizer,
             bias_regularizer=bias_regularizer,
             name=name, **kwargs