Skip to content

Commit b771734

Browse files
authored
Merge pull request #87 from TensorSpeech/dev/testing
Add unittest and Transducer Joint activation
2 parents 3004f0e + 4a97b87 commit b771734

29 files changed

+746
-599
lines changed

examples/conformer/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ model_config:
5656
prediction_layer_norm: True
5757
prediction_projection_units: 0
5858
joint_dim: 320
59+
joint_activation: tanh
5960

6061
learning_config:
6162
augmentations:

examples/contextnet/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ model_config:
192192
prediction_layer_norm: True
193193
prediction_projection_units: 0
194194
joint_dim: 640
195+
joint_activation: tanh
195196

196197
learning_config:
197198
augmentations:

examples/deepspeech2/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@ model_config:
2929
3030
See `python examples/deepspeech2/train_*.py --help`
3131

32-
See `python examples/deepspeech2/test_*.py --help`
32+
See `python examples/deepspeech2/test_*.py --help`
33+

examples/jasper/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,5 @@ model_config:
3737
3838
See `python examples/jasper/train_*.py --help`
3939

40-
See `python examples/jasper/test_*.py --help`
40+
See `python examples/jasper/test_*.py --help`
41+

examples/streaming_transducer/config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ model_config:
4949
prediction_projection_units: 320
5050
prediction_layer_norm: True
5151
joint_dim: 320
52+
joint_activation: tanh
5253

5354
learning_config:
5455
augmentations:

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333

3434
setuptools.setup(
3535
name="TensorFlowASR",
36-
version="0.6.0",
36+
version="0.6.1",
3737
author="Huy Le Nguyen",
3838
author_email="[email protected]",
3939
description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",

tensorflow_asr/models/conformer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ def __init__(self,
384384
prediction_layer_norm: bool = True,
385385
prediction_projection_units: int = 0,
386386
joint_dim: int = 1024,
387+
joint_activation: str = "tanh",
387388
kernel_regularizer=L2,
388389
bias_regularizer=L2,
389390
name: str = "conformer_transducer",
@@ -414,6 +415,7 @@ def __init__(self,
414415
layer_norm=prediction_layer_norm,
415416
projection_units=prediction_projection_units,
416417
joint_dim=joint_dim,
418+
joint_activation=joint_activation,
417419
kernel_regularizer=kernel_regularizer,
418420
bias_regularizer=bias_regularizer,
419421
name=name, **kwargs

tensorflow_asr/models/contextnet.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ class ContextNet(Transducer):
196196
def __init__(self,
197197
vocabulary_size: int,
198198
encoder_blocks: List[dict],
199-
encoder_alpha: float,
199+
encoder_alpha: float = 0.5,
200200
prediction_embed_dim: int = 512,
201201
prediction_embed_dropout: int = 0,
202202
prediction_num_rnns: int = 1,
@@ -206,6 +206,7 @@ def __init__(self,
206206
prediction_layer_norm: bool = True,
207207
prediction_projection_units: int = 0,
208208
joint_dim: int = 1024,
209+
joint_activation: str = "tanh",
209210
kernel_regularizer=L2,
210211
bias_regularizer=L2,
211212
name: str = "contextnet",
@@ -228,6 +229,7 @@ def __init__(self,
228229
layer_norm=prediction_layer_norm,
229230
projection_units=prediction_projection_units,
230231
joint_dim=joint_dim,
232+
joint_activation=joint_activation,
231233
kernel_regularizer=kernel_regularizer,
232234
bias_regularizer=bias_regularizer,
233235
name=name, **kwargs

tensorflow_asr/models/ctc.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
class CtcModel(Model):
2626
def __init__(self, **kwargs):
2727
super(CtcModel, self).__init__(**kwargs)
28+
self.time_reduction_factor = 1
2829

2930
def _build(self, input_shape):
3031
features = tf.keras.Input(input_shape, dtype=tf.float32)
@@ -67,7 +68,7 @@ def recognize_tflite(self, signal):
6768
features = self.speech_featurizer.tf_extract(signal)
6869
features = tf.expand_dims(features, axis=0)
6970
input_length = shape_list(features)[1]
70-
input_length = get_reduced_length(input_length, self.base_model.time_reduction_factor)
71+
input_length = get_reduced_length(input_length, self.time_reduction_factor)
7172
input_length = tf.expand_dims(input_length, axis=0)
7273
logits = self(features, training=False)
7374
probs = tf.nn.softmax(logits)
@@ -113,7 +114,7 @@ def recognize_beam_tflite(self, signal):
113114
features = self.speech_featurizer.tf_extract(signal)
114115
features = tf.expand_dims(features, axis=0)
115116
input_length = shape_list(features)[1]
116-
input_length = get_reduced_length(input_length, self.base_model.time_reduction_factor)
117+
input_length = get_reduced_length(input_length, self.time_reduction_factor)
117118
input_length = tf.expand_dims(input_length, axis=0)
118119
logits = self(features, training=False)
119120
probs = tf.nn.softmax(logits)

tensorflow_asr/models/streaming_transducer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def __init__(self,
192192
prediction_layer_norm: bool = True,
193193
prediction_projection_units: int = 640,
194194
joint_dim: int = 640,
195+
joint_activation: str = "tanh",
195196
kernel_regularizer = None,
196197
bias_regularizer = None,
197198
name = "StreamingTransducer",
@@ -217,6 +218,7 @@ def __init__(self,
217218
layer_norm=prediction_layer_norm,
218219
projection_units=prediction_projection_units,
219220
joint_dim=joint_dim,
221+
joint_activation=joint_activation,
220222
kernel_regularizer=kernel_regularizer,
221223
bias_regularizer=bias_regularizer,
222224
name=name, **kwargs

0 commit comments

Comments
 (0)