pythonlessons
diff --git a/‎.vscode/settings.json‎
Lines changed: 10 additions & 1 deletion b/‎.vscode/settings.json‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 16 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎Tests/test_metrics.py‎
Lines changed: 53 additions & 0 deletions b/‎Tests/test_metrics.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎Tests/test_text_utils.py‎
Lines changed: 95 additions & 0 deletions b/‎Tests/test_text_utils.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎Tutorials/02_captcha_to_text/train.py‎
Lines changed: 1 addition & 0 deletions b/‎Tutorials/02_captcha_to_text/train.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Tutorials/03_handwriting_recognition/configs.py‎
Lines changed: 17 additions & 0 deletions b/‎Tutorials/03_handwriting_recognition/configs.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎Tutorials/03_handwriting_recognition/inferenceModel.py‎
Lines changed: 46 additions & 0 deletions b/‎Tutorials/03_handwriting_recognition/inferenceModel.py‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎Tutorials/03_handwriting_recognition/model.py‎
Lines changed: 35 additions & 0 deletions b/‎Tutorials/03_handwriting_recognition/model.py‎
Lines changed: 35 additions & 0 deletions
@@ -1,3 +1,12 @@
 {
-    "python.analysis.typeCheckingMode": "off"
+    "python.analysis.typeCheckingMode": "off",
+    "python.testing.unittestArgs": [
+        "-v",
+        "-s",
+        "./Tests",
+        "-p",
+        "*test*.py"
+    ],
+    "python.testing.pytestEnabled": false,
+    "python.testing.unittestEnabled": true
 }
@@ -1,4 +1,20 @@
+## [0.1.5] - 2022-01-10
+
+### Changed
+- seperated CWERMetric to SER and WER Metrics in mltu.metrics, Character/word rate was calculatted in a wrong way
+- created @setter for augmentors and transformers in DataProvider, to properlly add augmentors and transformers to the pipeline
+- augmentors and transformers must inherit from `mltu.augmentors.base.Augmentor` and `mltu.transformers.base.Transformer` respectively
+- updated ImageShowCV2 transformer documentation
+- fixed OnnxInferenceModel in mltu.inferenceModels to use CPU even if GPU is available with force_cpu=True flag
+
+### Added:
+- added RandomSharpen to mltu.augmentors, used for simple image augmentation;
+- added ImageShowCV2 to mltu.transformers, used to show image with cv2 for debugging purposes;
+- added better explained documentation
+- created unittests for CER and WER in mltu.utils.text_utils and TensorFlow verion of CER and WER mltu.metrics
+
 ## [0.1.4] - 2022-12-21
+
 ### Added:
 - added mltu.augmentors (RandomBrightness, RandomRotate, RandomErodeDilate) - used for simple image augmentation;
 
 
@@ -0,0 +1,53 @@
+import unittest
+import numpy as np
+from mltu.metrics import CERMetric, WERMetric
+
+import numpy as np
+import tensorflow as tf
+
+class TestMetrics(unittest.TestCase):
+
+    def to_embeddings(self, sentences, vocab):
+        embeddings, max_len = [], 0
+
+        for sentence in sentences:
+            embedding = []
+            for character in sentence:
+                embedding.append(vocab.index(character))
+            embeddings.append(embedding)
+            max_len = max(max_len, len(embedding))
+        return embeddings, max_len
+
+    def setUp(self) -> None:
+        true_words = ['Who are you', 'I am a student', 'I am a teacher', 'Just different sentence length']
+        pred_words = ['Who are you', 'I am a ztudent', 'I am A reacher', 'Just different length']
+
+        vocab = set()
+        for sen in true_words + pred_words:
+            for character in sen:
+                vocab.add(character)
+        self.vocab = "".join(vocab)
+
+        sentence_true, max_len_true = self.to_embeddings(true_words, self.vocab)
+        sentence_pred, max_len_pred = self.to_embeddings(pred_words, self.vocab)
+
+        max_len = max(max_len_true, max_len_pred)
+        padding_length = 64
+
+        self.sen_true = [np.pad(sen, (0, max_len - len(sen)), 'constant', constant_values=len(self.vocab)) for sen in sentence_true]
+        self.sen_pred = [np.pad(sen, (0, padding_length - len(sen)), 'constant', constant_values=-1) for sen in sentence_pred]
+
+    def test_CERMetric(self):
+        vocabulary = tf.constant(list(self.vocab))
+        cer = CERMetric.get_cer(self.sen_true, self.sen_pred, vocabulary).numpy()
+
+        self.assertTrue(np.array_equal(cer, np.array([0.0, 0.071428575, 0.14285715, 0.42857143], dtype=np.float32)))
+
+    def test_WERMetric(self):
+        vocabulary = tf.constant(list(self.vocab))
+        wer = WERMetric.get_wer(self.sen_true, self.sen_pred, vocabulary).numpy()
+
+        self.assertTrue(np.array_equal(wer, np.array([0., 0.25, 0.5, 0.33333334], dtype=np.float32)))
+
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,95 @@
+import unittest
+
+from mltu.utils.text_utils import edit_distance, get_cer, get_wer 
+
+class TestTextUtils(unittest.TestCase):
+
+    def test_edit_distance(self):
+        """ This unit test includes several test cases to cover different scenarios, including no errors, 
+        substitution errors, insertion errors, deletion errors, and a more complex case with multiple 
+        errors. It also includes a test case for empty input.
+        """
+        # Test simple case with no errors
+        prediction_tokens = ['A', 'B', 'C']
+        reference_tokens = ['A', 'B', 'C']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 0)
+        
+        # Test simple case with one substitution error
+        prediction_tokens = ['A', 'B', 'D']
+        reference_tokens = ['A', 'B', 'C']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
+        
+        # Test simple case with one insertion error
+        prediction_tokens = ['A', 'B', 'C']
+        reference_tokens = ['A', 'B', 'C', 'D']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
+        
+        # Test simple case with one deletion error
+        prediction_tokens = ['A', 'B']
+        reference_tokens = ['A', 'B', 'C']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 1)
+        
+        # Test more complex case with multiple errors
+        prediction_tokens = ['A', 'B', 'C', 'D', 'E']
+        reference_tokens = ['A', 'C', 'B', 'F', 'E']
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 3)
+        
+        # Test empty input
+        prediction_tokens = []
+        reference_tokens = []
+        self.assertEqual(edit_distance(prediction_tokens, reference_tokens), 0)
+
+    def test_get_cer(self):
+        # Test simple case with no errors
+        preds = ['A B C']
+        target = ['A B C']
+        self.assertEqual(get_cer(preds, target), 0)
+        
+        # Test simple case with one character error
+        preds = ['A B C']
+        target = ['A B D']
+        self.assertEqual(get_cer(preds, target), 1/5)
+        
+        # Test simple case with multiple character errors
+        preds = ['A B C']
+        target = ['D E F']
+        self.assertEqual(get_cer(preds, target), 3/5)
+        
+        # Test empty input
+        preds = []
+        target = []
+        self.assertEqual(get_cer(preds, target), 0)
+
+        # Test simple case with different word lengths
+        preds = ['ABC']
+        target = ['ABCDEFG']
+        self.assertEqual(get_cer(preds, target), 4/7)
+
+    def test_get_wer(self):
+        # Test simple case with no errors
+        preds = 'A B C'
+        target = 'A B C'
+        self.assertEqual(get_wer(preds, target), 0)
+        
+        # Test simple case with one word error
+        preds = 'A B C'
+        target = 'A B D'
+        self.assertEqual(get_wer(preds, target), 1/3)
+        
+        # Test simple case with multiple word errors
+        preds = 'A B C'
+        target = 'D E F'
+        self.assertEqual(get_wer(preds, target), 1)
+        
+        # Test empty input
+        preds = ""
+        target = ""
+        self.assertEqual(get_wer(preds, target), 0)
+
+        # Test simple case with different sentence lengths
+        preds = ['ABC']
+        target = ['ABC DEF']
+        self.assertEqual(get_wer(preds, target), 1)
+
+if __name__ == '__main__':
+    unittest.main()
@@ -60,6 +60,7 @@ def download_and_unzip(url, extract_to='Datasets'):
 # Augment training data with random brightness, rotation and erode/dilate
 train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]
 
+# Creating TensorFlow model architecture
 model = train_model(
     input_dim = (configs.height, configs.width, 3),
     output_dim = len(configs.vocab),
 
@@ -0,0 +1,17 @@
+import stow
+from datetime import datetime
+
+from mltu.configs import BaseModelConfigs
+
+class ModelConfigs(BaseModelConfigs):
+    def __init__(self):
+        super().__init__()
+        self.model_path = stow.join('Models/03_handwriting_recognition', datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
+        self.vocab = ''
+        self.height = 32
+        self.width = 128
+        self.max_text_length = 0
+        self.batch_size = 64
+        self.learning_rate = 0.001
+        self.train_epochs = 1000
+        self.train_workers = 20
@@ -0,0 +1,46 @@
+import cv2
+import typing
+import numpy as np
+
+from mltu.inferenceModel import OnnxInferenceModel
+from mltu.utils.text_utils import ctc_decoder, get_cer
+
+class ImageToWordModel(OnnxInferenceModel):
+    def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.char_list = char_list
+
+    def predict(self, image: np.ndarray):
+        image = cv2.resize(image, self.input_shape[:2][::-1])
+
+        image_pred = np.expand_dims(image, axis=0).astype(np.float32)
+
+        preds = self.model.run(None, {self.input_name: image_pred})[0]
+
+        text = ctc_decoder(preds, self.char_list)[0]
+
+        return text
+
+if __name__ == "__main__":
+    import pandas as pd
+    from tqdm import tqdm
+    from mltu.configs import BaseModelConfigs
+
+    configs = BaseModelConfigs.load("Models/03_handwriting_recognition/202212290905/configs.yaml")
+
+    model = ImageToWordModel(model_path=configs.model_path, char_list=configs.vocab)
+
+    df = pd.read_csv("Models/03_handwriting_recognition/202212290905/val.csv").values.tolist()
+
+    accum_cer = []
+    for image_path, label in tqdm(df):
+        image = cv2.imread(image_path)
+
+        prediction_text = model.predict(image)
+
+        cer = get_cer(prediction_text, label)
+        print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")
+
+        accum_cer.append(cer)
+
+    print(f"Average CER: {np.average(accum_cer)}")
@@ -0,0 +1,35 @@
+from keras import layers
+from keras.models import Model
+
+from mltu.model_utils import residual_block
+
+def train_model(input_dim, output_dim, activation='leaky_relu', dropout=0.2):
+    
+    inputs = layers.Input(shape=input_dim, name="input")
+
+    # normalize images here instead in preprocessing step
+    input = layers.Lambda(lambda x: x / 255)(inputs)
+
+    x1 = residual_block(input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)
+
+    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x7 = residual_block(x6, 64, activation=activation, skip_conv=True, strides=1, dropout=dropout)
+
+    x8 = residual_block(x7, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+    x9 = residual_block(x8, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)
+
+    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
+    blstm = layers.Dropout(dropout)(blstm)
+
+    output = layers.Dense(output_dim + 1, activation='softmax', name="output")(blstm)
+
+    model = Model(inputs=inputs, outputs=output)
+    return model
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,12 @@`
`1`	`1`	`{`
`2`		`- "python.analysis.typeCheckingMode": "off"`
	`2`	`+ "python.analysis.typeCheckingMode": "off",`
	`3`	`+ "python.testing.unittestArgs": [`
	`4`	`+ "-v",`
	`5`	`+ "-s",`
	`6`	`+ "./Tests",`
	`7`	`+ "-p",`
	`8`	`+ "test.py"`
	`9`	`+ ],`
	`10`	`+ "python.testing.pytestEnabled": false,`
	`11`	`+ "python.testing.unittestEnabled": true`
`3`	`12`	`}`