Finished 4th tutorial code, fixes CER and WER metrics, wrote some tests for them

pythonlessons · pythonlessons · commit 29cad5ac78fc · 2023-01-06T14:30:32.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,14 +1,15 @@
 ## [0.1.5] - 2022-01-03
 
 ### Changed
-- changed CWERMetric in mltu.metrics, Character/word rate was calculatted in a wrong way
+- seperated CWERMetric to SER and WER Metrics in mltu.metrics, Character/word rate was calculatted in a wrong way
 - created @setter for augmentors and transformers in DataProvider, to properlly add augmentors and transformers to the pipeline
 - augmentors and transformers must inherit from `mltu.augmentors.base.Augmentor` and `mltu.transformers.base.Transformer` respectively
-- added better explained documentation
 
 ### Added:
 - added RandomSharpen to mltu.augmentors, used for simple image augmentation;
 - added ImageShowCV2 to mltu.transformers, used to show image with cv2 for debugging purposes;
+- added better explained documentation
+- created unittests for CER and WER in mltu.utils.text_utils and TensorFlow verion of CER and WER mltu.metrics
 
 ## [0.1.4] - 2022-12-21
 
diff --git a/Tests/test_metrics.py b/Tests/test_metrics.py
@@ -1,65 +1,53 @@
+import unittest
 import numpy as np
 from mltu.metrics import CERMetric, WERMetric
 
-from mltu.utils.text_utils import get_wer as wer
-
-import cv2
-import typing
 import numpy as np
 import tensorflow as tf
 
-if __name__ == "__main__":
-    import pandas as pd
-    from tqdm import tqdm
-)
-
-  
+class TestMetrics(unittest.TestCase):
 
-    # sentences_true = ['helo love', 'helo home', 'helo world']
-    # sentences_pred = ['helo python', 'helo home', 'helo python here']
+    def to_embeddings(self, sentences, vocab):
+        embeddings, max_len = [], 0
 
-    # def to_embeddings(sentences, vocab):
-    #     embeddings, max_len = [], 0
+        for sentence in sentences:
+            embedding = []
+            for character in sentence:
+                embedding.append(vocab.index(character))
+            embeddings.append(embedding)
+            max_len = max(max_len, len(embedding))
+        return embeddings, max_len
 
-    #     for sentence in sentences:
-    #         embedding = []
-    #         for character in sentence:
-    #             embedding.append(vocab.index(character))
-    #         embeddings.append(embedding)
-    #         max_len = max(max_len, len(embedding))
-    #     return embeddings, max_len
+    def setUp(self) -> None:
+        true_words = ['Who are you', 'I am a student', 'I am a teacher', 'Just different sentence length']
+        pred_words = ['Who are you', 'I am a ztudent', 'I am A reacher', 'Just different length']
 
-    # vocab = set()
-    # for sen in sentences_true + sentences_pred:
-    #     for character in sen:
-    #         vocab.add(character)
-    # vocab = "".join(vocab)
+        vocab = set()
+        for sen in true_words + pred_words:
+            for character in sen:
+                vocab.add(character)
+        self.vocab = "".join(vocab)
 
-    # sen1, max_len = to_embeddings(sentences_true, vocab)
-    # sen2, _ = to_embeddings(sentences_pred, vocab)
+        sentence_true, max_len_true = self.to_embeddings(true_words, self.vocab)
+        sentence_pred, max_len_pred = self.to_embeddings(pred_words, self.vocab)
 
-    # sen_true = [np.pad(sen, (0, max_len - len(sen)), 'constant', constant_values=len(vocab)) for sen in sen1]
-    # sen_pred = [np.pad(sen, (0, 24 - len(sen)), 'constant', constant_values=-1) for sen in sen2]
+        max_len = max(max_len_true, max_len_pred)
+        padding_length = 64
 
+        self.sen_true = [np.pad(sen, (0, max_len - len(sen)), 'constant', constant_values=len(self.vocab)) for sen in sentence_true]
+        self.sen_pred = [np.pad(sen, (0, padding_length - len(sen)), 'constant', constant_values=-1) for sen in sentence_pred]
 
-    # tf_vocab = tf.constant(list(vocab))
+    def test_CERMetric(self):
+        vocabulary = tf.constant(list(self.vocab))
+        cer = CERMetric.get_cer(self.sen_true, self.sen_pred, vocabulary).numpy()
 
-    # distance = WERMetric.get_wer(sen_pred, sen_true, vocab=tf_vocab)
+        self.assertTrue(np.array_equal(cer, np.array([0.0, 0.071428575, 0.14285715, 0.42857143], dtype=np.float32)))
 
-    # d = wer(sentences_pred, sentences_true)
+    def test_WERMetric(self):
+        vocabulary = tf.constant(list(self.vocab))
+        wer = WERMetric.get_wer(self.sen_true, self.sen_pred, vocabulary).numpy()
 
-    # print(list(distance.numpy()))
-    # print(d)
+        self.assertTrue(np.array_equal(wer, np.array([0., 0.25, 0.5, 0.33333334], dtype=np.float32)))
 
-
-    word_true = [
-        [1, 2, 3, 4, 5, 6, 1],
-        [2, 3, 4, 5, 6, 1, 1]
-    ]
-    word_pred = [
-        [1, 2, 3, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
-        [2, 3, 4, 5, 6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
-    ]
-    vocabulary = tf.constant(list("abcdefg"))
-
-    distance = CERMetric.get_cer(word_pred, word_true, vocabulary)
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Tests/test_text_utils.py b/Tests/test_text_utils.py
@@ -69,27 +69,27 @@ def test_get_wer(self):
         # Test simple case with no errors
         preds = 'A B C'
         target = 'A B C'
-        self.assertEqual(get_wer(preds, target), [0, 0, 0])
+        self.assertEqual(get_wer(preds, target), 0)
         
         # Test simple case with one word error
         preds = 'A B C'
         target = 'A B D'
-        self.assertEqual(get_wer(preds, target), [0, 0, 1])
+        self.assertEqual(get_wer(preds, target), 1/3)
         
         # Test simple case with multiple word errors
         preds = 'A B C'
         target = 'D E F'
-        self.assertEqual(get_wer(preds, target), [1, 1, 1])
+        self.assertEqual(get_wer(preds, target), 1)
         
         # Test empty input
         preds = ""
         target = ""
-        self.assertEqual(get_wer(preds, target), [])
+        self.assertEqual(get_wer(preds, target), 0)
 
         # Test simple case with different sentence lengths
         preds = ['ABC']
         target = ['ABC DEF']
-        self.assertEqual(get_wer(preds, target), [1/2])
+        self.assertEqual(get_wer(preds, target), 1)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Tutorials/04_sentence_recognition/configs.py b/Tutorials/04_sentence_recognition/configs.py
@@ -12,6 +12,6 @@ def __init__(self):
         self.width = 1408
         self.max_text_length = 0
         self.batch_size = 32
-        self.learning_rate = 0.003
+        self.learning_rate = 0.001
         self.train_epochs = 1000
         self.train_workers = 20
diff --git a/Tutorials/04_sentence_recognition/inferenceModel.py b/Tutorials/04_sentence_recognition/inferenceModel.py
@@ -26,11 +26,11 @@ def predict(self, image: np.ndarray):
     from tqdm import tqdm
     from mltu.configs import BaseModelConfigs
 
-    configs = BaseModelConfigs.load("Models/04_sentence_recognition/202301041513/configs.yaml")
+    configs = BaseModelConfigs.load("Models/04_sentence_recognition/202301060816/configs.yaml")
 
     model = ImageToWordModel(model_path=configs.model_path, char_list=configs.vocab)
 
-    df = pd.read_csv("Models/04_sentence_recognition/202301041513/val.csv").values.tolist()
+    df = pd.read_csv("Models/04_sentence_recognition/202301060816/val.csv").values.tolist()
 
     accum_cer, accum_wer = [], []
     for image_path, label in tqdm(df):
diff --git a/Tutorials/04_sentence_recognition/model.py b/Tutorials/04_sentence_recognition/model.py
@@ -26,7 +26,10 @@ def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
 
     squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)
 
-    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
+    blstm = layers.Bidirectional(layers.LSTM(256, return_sequences=True))(squeezed)
+    blstm = layers.Dropout(dropout)(blstm)
+
+    blstm = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(blstm)
     blstm = layers.Dropout(dropout)(blstm)
 
     output = layers.Dense(output_dim + 1, activation='softmax', name="output")(blstm)
diff --git a/Tutorials/04_sentence_recognition/train.py b/Tutorials/04_sentence_recognition/train.py
@@ -10,7 +10,7 @@
 from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen
 from mltu.losses import CTCloss
 from mltu.callbacks import Model2onnx, TrainLogger
-from mltu.metrics import CWERMetric
+from mltu.metrics import CERMetric, WERMetric
 
 from model import train_model
 from configs import ModelConfigs
@@ -89,7 +89,10 @@
 model.compile(
     optimizer=tf.keras.optimizers.Adam(learning_rate=configs.learning_rate), 
     loss=CTCloss(), 
-    metrics=[CWERMetric(padding_token=len(configs.vocab))],
+    metrics=[
+        CERMetric(vocabulary=configs.vocab),
+        WERMetric(vocabulary=configs.vocab)
+        ],
     run_eagerly=False
 )
 model.summary(line_length=110)
diff --git a/mltu/metrics.py b/mltu/metrics.py
@@ -78,7 +78,7 @@ class CERMetric(tf.keras.metrics.Metric):
         name: (Optional) string name of the metric instance.
         **kwargs: Additional keyword arguments.
     """
-    def __init__(self, vocabulary, name='CWER', **kwargs):
+    def __init__(self, vocabulary, name='CER', **kwargs):
         # Initialize the base Metric class
         super(CERMetric, self).__init__(name=name, **kwargs)
         
@@ -103,11 +103,12 @@ def get_cer(pred_decoded, y_true, vocab, padding=-1):
             tf.Tensor: The CER between the predicted labels and true labels
         """
         # Keep only valid indices in the predicted labels tensor, replacing invalid indices with padding token
-        valid_pred_indices = tf.less(pred_decoded, tf.shape(vocab)[0])
+        vocab_length = tf.cast(tf.shape(vocab)[0], tf.int64)
+        valid_pred_indices = tf.less(pred_decoded, vocab_length)
         valid_pred = tf.where(valid_pred_indices, pred_decoded, padding)
 
         # Keep only valid indices in the true labels tensor, replacing invalid indices with padding token
-        valid_true_indices = tf.less(y_true, tf.shape(vocab)[0])
+        valid_true_indices = tf.less(y_true, vocab_length)
         valid_true = tf.where(valid_true_indices, y_true, padding)
 
         # Convert the valid predicted labels tensor to a sparse tensor
@@ -186,7 +187,8 @@ def preprocess_dense(dense_input: tf.Tensor, vocab: tf.Tensor, padding=-1) -> tf
             tf.SparseTensor: The sparse tensor with given vocabulary
         """
         # Keep only the valid indices of the dense input tensor
-        valid_indices = tf.less(dense_input, tf.shape(vocab)[0])
+        vocab_length = tf.cast(tf.shape(vocab)[0], tf.int64)
+        valid_indices = tf.less(dense_input, vocab_length)
         valid_input = tf.where(valid_indices, dense_input, padding)
 
         # Convert the valid input tensor to a ragged tensor with padding
diff --git a/mltu/utils/text_utils.py b/mltu/utils/text_utils.py
@@ -82,6 +82,7 @@ def get_cer(
         return 0.0
 
     cer = errors / total
+
     return cer
 
 def get_wer(
@@ -102,20 +103,13 @@ def get_wer(
     if isinstance(target, str):
         target = target.split()
 
-    assert len(preds) == len(target), 'preds and target must have the same length'
-
-    wer = []
-    for pred, tgt in zip(preds, target):
-        errors = edit_distance(pred.split(), tgt.split())
-        total_words = len(tgt.split())
+    errors = edit_distance(preds, target)
+    total_words = len(target)
 
-        if total_words == 0:
-            wer.append(0)
-            continue
-
-        wer.append(errors / total_words)
+    if total_words == 0:
+        return 0.0
 
-    return wer
+    return errors / total_words
 
 if __name__ == '__main__':
     c1 = 'ROKAS'