Skip to content

Commit 993dd06

Browse files
Merge pull request #1 from pythonlessons/feature/initial-files
Feature/initial files
2 parents c962435 + d7548ed commit 993dd06

28 files changed

+974
-2
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
__pycache__
2+
*.egg-info
3+
*.pyc
4+
venv
5+
6+
Datasets/*
7+
Models/*
8+
9+
!*.md

.vscode/launch.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
// Use IntelliSense to learn about possible attributes.
3+
// Hover to view descriptions of existing attributes.
4+
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5+
"version": "0.2.0",
6+
"configurations": [
7+
{
8+
"name": "Python: Current File",
9+
"type": "python",
10+
"request": "launch",
11+
"program": "${file}",
12+
"console": "integratedTerminal",
13+
"justMyCode": false
14+
}
15+
]
16+
}

.vscode/settings.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"python.analysis.typeCheckingMode": "off"
3+
}

Datasets/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Empty repository to hold the datasets when running Tutorials

Models/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Empty repository to hold the Models when running Tutorials

README.md

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,14 @@
1-
# mltu
2-
Machine Learning Training Utilities (for TensorFlow)
1+
# MLTU - Machine Learning Training Utilities (TensorFlow)
2+
Machine Learning Training Utilities with TensorFlow 2.* and Python 3
3+
4+
## Installation:
5+
Clone the repository and install the requirements:
6+
```bash
7+
git clone https://github.com/pythonlessons/mltu.git
8+
```
9+
```bash
10+
pip install .
11+
```
12+
13+
# Tutorials and Examples:
14+
...

Tests/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# Repository for unit tests
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Text Recognition With TensorFlow and CTC network
2+
In this tutorial, we will explore how to recognize text from images using TensorFlow and CTC loss with the Neural Networks model
3+
4+
## Introduction
5+
...
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import stow
2+
from datetime import datetime
3+
4+
from mltu.configs import BaseModelConfigs
5+
6+
class ModelConfigs(BaseModelConfigs):
7+
def __init__(self):
8+
super().__init__()
9+
self.model_path = stow.join('Models/1_image_to_word', datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
10+
self.vocab = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
11+
self.height = 32
12+
self.width = 128
13+
self.max_text_length = 23
14+
self.batch_size = 1024
15+
self.learning_rate = 1e-4
16+
self.train_epochs = 100
17+
self.train_workers = 20
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import cv2
2+
import typing
3+
import numpy as np
4+
5+
from mltu.inferenceModel import OnnxInferenceModel
6+
from mltu.utils.text_utils import ctc_decoder, get_cer
7+
8+
class ImageToWordModel(OnnxInferenceModel):
9+
def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
10+
super().__init__(*args, **kwargs)
11+
self.char_list = char_list
12+
13+
def predict(self, image: np.ndarray):
14+
image = cv2.resize(image, self.input_shape[:2][::-1])
15+
16+
image_pred = np.expand_dims(image, axis=0).astype(np.float32)
17+
18+
preds = self.model.run(None, {self.input_name: image_pred})[0]
19+
20+
text = ctc_decoder(preds, self.char_list)[0]
21+
22+
return text
23+
24+
25+
if __name__ == "__main__":
26+
import pandas as pd
27+
from tqdm import tqdm
28+
from mltu.configs import BaseModelConfigs
29+
30+
configs = BaseModelConfigs.load("Models/1_image_to_word/202211270035/configs.yaml")
31+
32+
model = ImageToWordModel(model_path=configs.model_path, char_list=configs.vocab)
33+
34+
df = pd.read_csv("Models/1_image_to_word/202211270035/val.csv").dropna().values.tolist()
35+
36+
accum_cer = []
37+
for image_path, label in tqdm(df[:20]):
38+
image = cv2.imread(image_path)
39+
40+
try:
41+
prediction_text = model.predict(image)
42+
43+
cer = get_cer(prediction_text, label)
44+
print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")
45+
46+
# resize image by 3 times for visualization
47+
# image = cv2.resize(image, (image.shape[1] * 3, image.shape[0] * 3))
48+
# cv2.imshow(prediction_text, image)
49+
# cv2.waitKey(0)
50+
# cv2.destroyAllWindows()
51+
except:
52+
continue
53+
54+
accum_cer.append(cer)
55+
56+
print(f"Average CER: {np.average(accum_cer)}")

0 commit comments

Comments
 (0)