weoking on CER and WER TensorFlow metrics as well, writing unittests for them

pythonlessons · pythonlessons · commit 57bf2dad137b · 2023-01-05T15:01:01.000+02:00
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,3 +1,12 @@
 {
-    "python.analysis.typeCheckingMode": "off"
+    "python.analysis.typeCheckingMode": "off",
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./Tests",
+        "-p",
+        "*test*.py"
+    ],
+    "python.testing.pytestEnabled": false,
+    "python.testing.unittestEnabled": true
 }
diff --git a/Tests/test_metrics.py b/Tests/test_metrics.py
@@ -0,0 +1,65 @@
+import numpy as np
+from mltu.metrics import CERMetric, WERMetric
+
+from mltu.utils.text_utils import get_wer as wer
+
+import cv2
+import typing
+import numpy as np
+import tensorflow as tf
+
+if __name__ == "__main__":
+    import pandas as pd
+    from tqdm import tqdm
+)
+
+  
+
+    # sentences_true = ['helo love', 'helo home', 'helo world']
+    # sentences_pred = ['helo python', 'helo home', 'helo python here']
+
+    # def to_embeddings(sentences, vocab):
+    #     embeddings, max_len = [], 0
+
+    #     for sentence in sentences:
+    #         embedding = []
+    #         for character in sentence:
+    #             embedding.append(vocab.index(character))
+    #         embeddings.append(embedding)
+    #         max_len = max(max_len, len(embedding))
+    #     return embeddings, max_len
+
+    # vocab = set()
+    # for sen in sentences_true + sentences_pred:
+    #     for character in sen:
+    #         vocab.add(character)
+    # vocab = "".join(vocab)
+
+    # sen1, max_len = to_embeddings(sentences_true, vocab)
+    # sen2, _ = to_embeddings(sentences_pred, vocab)
+
+    # sen_true = [np.pad(sen, (0, max_len - len(sen)), 'constant', constant_values=len(vocab)) for sen in sen1]
+    # sen_pred = [np.pad(sen, (0, 24 - len(sen)), 'constant', constant_values=-1) for sen in sen2]
+
+
+    # tf_vocab = tf.constant(list(vocab))
+
+    # distance = WERMetric.get_wer(sen_pred, sen_true, vocab=tf_vocab)
+
+    # d = wer(sentences_pred, sentences_true)
+
+    # print(list(distance.numpy()))
+    # print(d)
+
+
+    word_true = [
+        [1, 2, 3, 4, 5, 6, 1],
+        [2, 3, 4, 5, 6, 1, 1]
+    ]
+    word_pred = [
+        [1, 2, 3, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
+        [2, 3, 4, 5, 6,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
+    ]
+    vocabulary = tf.constant(list("abcdefg"))
+
+    distance = CERMetric.get_cer(word_pred, word_true, vocabulary)
diff --git a/Tests/test_text_utils.py b/Tests/test_text_utils.py
@@ -0,0 +1,95 @@
+import unittest
+
+from mltu.utils.text_utils import edit_distance, get_cer, get_wer 
+
+class TestTextUtils(unittest.TestCase):
+
+    def test_edit_distance(self):
+        """ This unit test includes several test cases to cover different scenarios, including no errors, 
+        substitution errors, insertion errors, deletion errors, and a more complex case with multiple 
+        errors. It also includes a test case for empty input.
+        """
+        # Test simple case with no errors
+        prediction_tokens = ['A', 'B', 'C']
+        reference_tokens = ['A', 'B', 'C']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 0)
+        
+        # Test simple case with one substitution error
+        prediction_tokens = ['A', 'B', 'D']
+        reference_tokens = ['A', 'B', 'C']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
+        
+        # Test simple case with one insertion error
+        prediction_tokens = ['A', 'B', 'C']
+        reference_tokens = ['A', 'B', 'C', 'D']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
+        
+        # Test simple case with one deletion error
+        prediction_tokens = ['A', 'B']
+        reference_tokens = ['A', 'B', 'C']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
+        
+        # Test more complex case with multiple errors
+        prediction_tokens = ['A', 'B', 'C', 'D', 'E']
+        reference_tokens = ['A', 'C', 'B', 'F', 'E']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 3)
+        
+        # Test empty input
+        prediction_tokens = []
+        reference_tokens = []
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 0)
+
+    def test_get_cer(self):
+        # Test simple case with no errors
+        preds = ['A B C']
+        target = ['A B C']
+        self.assertEqual(get_cer(preds, target), 0)
+        
+        # Test simple case with one character error
+        preds = ['A B C']
+        target = ['A B D']
+        self.assertEqual(get_cer(preds, target), 1/5)
+        
+        # Test simple case with multiple character errors
+        preds = ['A B C']
+        target = ['D E F']
+        self.assertEqual(get_cer(preds, target), 3/5)
+        
+        # Test empty input
+        preds = []
+        target = []
+        self.assertEqual(get_cer(preds, target), 0)
+
+        # Test simple case with different word lengths
+        preds = ['ABC']
+        target = ['ABCDEFG']
+        self.assertEqual(get_cer(preds, target), 4/7)
+
+    def test_get_wer(self):
+        # Test simple case with no errors
+        preds = 'A B C'
+        target = 'A B C'
+        self.assertEqual(get_wer(preds, target), [0, 0, 0])
+        
+        # Test simple case with one word error
+        preds = 'A B C'
+        target = 'A B D'
+        self.assertEqual(get_wer(preds, target), [0, 0, 1])
+        
+        # Test simple case with multiple word errors
+        preds = 'A B C'
+        target = 'D E F'
+        self.assertEqual(get_wer(preds, target), [1, 1, 1])
+        
+        # Test empty input
+        preds = ""
+        target = ""
+        self.assertEqual(get_wer(preds, target), [])
+
+        # Test simple case with different sentence lengths
+        preds = ['ABC']
+        target = ['ABC DEF']
+        self.assertEqual(get_wer(preds, target), [1/2])
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/Tutorials/04_sentence_recognition/configs.py b/Tutorials/04_sentence_recognition/configs.py
@@ -0,0 +1,17 @@
+import stow
+from datetime import datetime
+
+from mltu.configs import BaseModelConfigs
+
+class ModelConfigs(BaseModelConfigs):
+    def __init__(self):
+        super().__init__()
+        self.model_path = stow.join('Models/04_sentence_recognition', datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
+        self.vocab = ''
+        self.height = 96
+        self.width = 1408
+        self.max_text_length = 0
+        self.batch_size = 32
+        self.learning_rate = 0.003
+        self.train_epochs = 1000
+        self.train_workers = 20
diff --git a/Tutorials/04_sentence_recognition/inferenceModel.py b/Tutorials/04_sentence_recognition/inferenceModel.py
@@ -0,0 +1,48 @@
+import cv2
+import typing
+import numpy as np
+
+from mltu.inferenceModel import OnnxInferenceModel
+from mltu.utils.text_utils import ctc_decoder, get_cer, get_wer
+
+class ImageToWordModel(OnnxInferenceModel):
+    def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.char_list = char_list
+
+    def predict(self, image: np.ndarray):
+        image = cv2.resize(image, self.input_shape[:2][::-1])
+
+        image_pred = np.expand_dims(image, axis=0).astype(np.float32)
+
+        preds = self.model.run(None, {self.input_name: image_pred})[0]
+
+        text = ctc_decoder(preds, self.char_list)[0]
+
+        return text
+
+if __name__ == "__main__":
+    import pandas as pd
+    from tqdm import tqdm
+    from mltu.configs import BaseModelConfigs
+
+    configs = BaseModelConfigs.load("Models/04_sentence_recognition/202301041513/configs.yaml")
+
+    model = ImageToWordModel(model_path=configs.model_path, char_list=configs.vocab)
+
+    df = pd.read_csv("Models/04_sentence_recognition/202301041513/val.csv").values.tolist()
+
+    accum_cer, accum_wer = [], []
+    for image_path, label in tqdm(df):
+        image = cv2.imread(image_path)
+
+        prediction_text = model.predict(image)
+
+        cer = get_cer(prediction_text, label)
+        wer = get_wer(prediction_text, label)
+        print(f"Image: {image_path}; Label: ({label}); Prediction: ({prediction_text}); CER: {cer}; WER: {wer}")
+
+        accum_cer.append(cer)
+        accum_wer.append(wer)
+
+    print(f"Average CER: {np.average(accum_cer)}, Average WER: {np.average(accum_wer)}")
diff --git a/Tutorials/04_sentence_recognition/model.py b/Tutorials/04_sentence_recognition/model.py
@@ -0,0 +1,35 @@
+from keras import layers
+from keras.models import Model
+
+from mltu.model_utils import residual_block
+
+def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
+    
+    inputs = layers.Input(shape=input_dim, name="input")
+
+    # normalize images here instead in preprocessing step
+    input = layers.Lambda(lambda x: x / 255)(inputs)
+
+    x1 = residual_block(input, 32, activation=activation, skip_conv=True, strides=1, dropout=dropout)
+
+    x2 = residual_block(x1, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x3 = residual_block(x2, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    x4 = residual_block(x3, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x5 = residual_block(x4, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    x6 = residual_block(x5, 128, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x7 = residual_block(x6, 128, activation=activation, skip_conv=True, strides=1, dropout=dropout)
+
+    x8 = residual_block(x7, 128, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x9 = residual_block(x8, 128, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)
+
+    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
+    blstm = layers.Dropout(dropout)(blstm)
+
+    output = layers.Dense(output_dim + 1, activation='softmax', name="output")(blstm)
+
+    model = Model(inputs=inputs, outputs=output)
+    return model
diff --git a/Tutorials/04_sentence_recognition/train.py b/Tutorials/04_sentence_recognition/train.py
diff --git a/mltu/metrics.py b/mltu/metrics.py
diff --git a/mltu/utils/text_utils.py b/mltu/utils/text_utils.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,12 @@`
`1`	`1`	`{`
`2`		`- "python.analysis.typeCheckingMode": "off"`
	`2`	`+ "python.analysis.typeCheckingMode": "off",`
	`3`	`+ "python.testing.unittestArgs": [`
	`4`	`+ "-v",`
	`5`	`+ "-s",`
	`6`	`+ "./Tests",`
	`7`	`+ "-p",`
	`8`	`+ "test.py"`
	`9`	`+ ],`
	`10`	`+ "python.testing.pytestEnabled": false,`
	`11`	`+ "python.testing.unittestEnabled": true`
`3`	`12`	`}`