🚀 Update transducer beam search and tester

nglehuy · nglehuy · commit 83d1b7058da8 · 2020-12-15T21:29:35.000+07:00
diff --git a/setup.py b/setup.py
@@ -19,6 +19,7 @@
 
 requirements = [
     "tensorflow-datasets>=3.2.1,<4.0.0",
+    "tensorflow-metadata>=0.26.0",
     "tensorflow-addons>=0.10.0",
     "setuptools>=47.1.1",
     "librosa>=0.7.2",
@@ -32,12 +33,11 @@
     "tqdm>=4.51.0",
     "colorama>=0.4.3",
     "nlpaug>=1.0.1",
-    "absl-py>=0.9,<0.11"
 ]
 
 setuptools.setup(
     name="TensorFlowASR",
-    version="0.4.3",
+    version="0.4.4",
     author="Huy Le Nguyen",
     author_email="nlhuy.cs.16@gmail.com",
     description="Almost State-of-the-art Automatic Speech Recognition using Tensorflow 2",
diff --git a/tensorflow_asr/models/transducer.py b/tensorflow_asr/models/transducer.py
@@ -462,7 +462,7 @@ def execute(signal: tf.Tensor):
         return tf.map_fn(execute, signals, fn_output_signature=tf.TensorSpec([], dtype=tf.string))
 
     def perform_beam_search(self, encoded, lm=False):
-        with tf.name_scope(f"{self.name}_beam_search"):
+        with tf.device("/CPU:0"), tf.name_scope(f"{self.name}_beam_search"):
             beam_width = tf.cond(
                 tf.less(self.text_featurizer.decoder_config.beam_width, self.text_featurizer.num_classes),
                 true_fn=lambda: self.text_featurizer.decoder_config.beam_width,
@@ -520,9 +520,9 @@ def beam_condition(beam, beam_width, A, A_i, B): return tf.less(beam, beam_width
                 def beam_body(beam, beam_width, A, A_i, B):
                     y_hat_score, y_hat_score_index = tf.math.top_k(A.score.stack(), k=1)
                     y_hat_score = y_hat_score[0]
-                    y_hat_index = tf.gather_nd(A.indices.stack(), tf.expand_dims(y_hat_score_index[0], axis=-1))
-                    y_hat_prediction = tf.gather_nd(A.prediction.stack(), tf.expand_dims(y_hat_score_index[0], axis=-1))
-                    y_hat_states = tf.gather_nd(A.states.stack(), tf.expand_dims(y_hat_score_index[0], axis=-1))
+                    y_hat_index = tf.gather_nd(A.indices.stack(), y_hat_score_index)
+                    y_hat_prediction = tf.gather_nd(A.prediction.stack(), y_hat_score_index)
+                    y_hat_states = tf.gather_nd(A.states.stack(), y_hat_score_index)
 
                     ytu, new_states = self.decoder_inference(encoded=encoded_t, predicted=y_hat_index, states=y_hat_states)
 
@@ -571,11 +571,16 @@ def predict_body(pred, A, A_i, B):
 
             _, _, B = tf.while_loop(condition, body, loop_vars=(0, total, B))
 
-            y_hat_score, y_hat_score_index = tf.math.top_k(B.score.stack(), k=1)
+            scores = B.score.stack()
+            if self.text_featurizer.decoder_config.norm_score:
+                prediction_lengths = tf.strings.length(B.prediction.stack(), unit="UTF8_CHAR")
+                scores /= tf.cast(prediction_lengths, dtype=scores.dtype)
+
+            y_hat_score, y_hat_score_index = tf.math.top_k(scores, k=1)
             y_hat_score = y_hat_score[0]
-            y_hat_index = tf.gather_nd(B.indices.stack(), tf.expand_dims(y_hat_score_index[0], axis=-1))
-            y_hat_prediction = tf.gather_nd(B.prediction.stack(), tf.expand_dims(y_hat_score_index[0], axis=-1))
-            y_hat_states = tf.gather_nd(B.states.stack(), tf.expand_dims(y_hat_score_index[0], axis=-1))
+            y_hat_index = tf.gather_nd(B.indices.stack(), y_hat_score_index)
+            y_hat_prediction = tf.gather_nd(B.prediction.stack(), y_hat_score_index)
+            y_hat_states = tf.gather_nd(B.states.stack(), y_hat_score_index)
 
             return Hypothesis(
                 index=y_hat_index,
diff --git a/tensorflow_asr/runners/base_runners.py b/tensorflow_asr/runners/base_runners.py
@@ -444,11 +444,11 @@ def _test_step(self, batch):
 
         labels = self.model.text_featurizer.iextract(labels)
         greed_pred = self.model.recognize(signals)
+        beam_pred = beam_lm_pred = tf.constant([""], dtype=tf.string)
         if self.model.text_featurizer.decoder_config.beam_width > 0:
             beam_pred = self.model.recognize_beam(signals, lm=False)
+        if self.model.text_featurizer.decoder_config.lm_config:
             beam_lm_pred = self.model.recognize_beam(signals, lm=True)
-        else:
-            beam_pred = beam_lm_pred = tf.constant([""], dtype=tf.string)
 
         return file_paths, labels, greed_pred, beam_pred, beam_lm_pred