Skip to content

Commit 940912e

Browse files
committed
CWER metric fix, dataprovider improoved, better documentation
1 parent 85a93cc commit 940912e

File tree

5 files changed

+262
-73
lines changed

5 files changed

+262
-73
lines changed

CHANGELOG.md

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
1-
## [0.1.5] - 2022-12-29
1+
## [0.1.5] - 2022-01-03
22

33
### Changed
44
- changed CWERMetric in mltu.metrics, Character/word rate was calculatted in a wrong way
5+
- created @setter for augmentors and transformers in DataProvider, to properlly add augmentors and transformers to the pipeline
6+
- augmentors and transformers must inherit from `mltu.augmentors.base.Augmentor` and `mltu.transformers.base.Transformer` respectively
7+
- added better explained documentation
8+
9+
### Added:
10+
- added RandomSharpen to mltu.augmentors, used for simple image augmentation;
11+
- added ImageShowCV2 to mltu.transformers, used to show image with cv2 for debugging purposes;
512

613
## [0.1.4] - 2022-12-21
714

Tutorials/03_handwriting_recognition/train.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from mltu.dataProvider import DataProvider
88
from mltu.preprocessors import ImageReader
9-
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding
10-
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate
9+
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding, ImageShowCV2
10+
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen
1111
from mltu.losses import CTCloss
1212
from mltu.callbacks import Model2onnx, TrainLogger
1313
from mltu.metrics import CWERMetric
@@ -82,15 +82,20 @@ def download_and_unzip(url, extract_to='Datasets', chunk_size=1024*1024):
8282
transformers=[
8383
ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
8484
LabelIndexer(configs.vocab),
85-
LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab))
85+
LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab)),
8686
],
8787
)
8888

8989
# Split the dataset into training and validation sets
9090
train_data_provider, val_data_provider = data_provider.split(split = 0.9)
9191

9292
# Augment training data with random brightness, rotation and erode/dilate
93-
train_data_provider.augmentors = [RandomBrightness(), RandomRotate(), RandomErodeDilate()]
93+
train_data_provider.augmentors = [
94+
RandomBrightness(),
95+
RandomErodeDilate(),
96+
RandomSharpen(),
97+
RandomRotate(angle=10),
98+
]
9499

95100
# Creating TensorFlow model architecture
96101
model = train_model(

mltu/augmentors.py

Lines changed: 170 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,16 @@
66
- RandomBrightness
77
- RandomRotate
88
- RandomErodeDilate
9+
- RandomSharpen
910
"""
1011

1112
class Augmentor:
1213
""" Object that should be inherited by all augmentors
13-
Args:
14-
image (np.ndarray): Image to augment
15-
annotation (np.ndarray): Annotation to augment
1614
17-
Returns:
18-
typing.Tuple[np.ndarray, np.ndarray]: Augmented image and mask
15+
Args:
16+
random_chance (float, optional): Chance of applying the augmentor. Where 0.0 is never and 1.0 is always. Defaults to 0.5.
1917
"""
2018
def __init__(self, random_chance: float=0.5) -> None:
21-
"""
22-
Args:
23-
random_chance (float, optional): Chance of applying the augmentor. Defaults to 0.5.
24-
"""
2519
self._random_chance = random_chance
2620

2721
def __call__(self, image: np.ndarray, annotation: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
@@ -33,27 +27,32 @@ def __call__(self, image: np.ndarray, annotation: np.ndarray) -> typing.Tuple[np
3327
class RandomBrightness(Augmentor):
3428
""" Randomly adjust image brightness
3529
36-
Args:
37-
image (np.ndarray): Image to be adjusted
38-
annotation (np.ndarray): Annotation to be adjusted
39-
40-
Returns:
41-
image (np.ndarray): Adjusted image
42-
annotation (np.ndarray): Adjusted annotation
30+
Attributes:
31+
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
32+
delta (int): Integer value for brightness adjustment
4333
"""
44-
def __init__(self, random_chance:float=0.5, delta:int=100)->None:
45-
"""
46-
Args:
47-
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability
48-
delta (int): Integer value for brightness adjustment
49-
"""
50-
assert delta >= 0.0
51-
assert delta <= 255.0
34+
def __init__(
35+
self,
36+
delta: int = 100,
37+
*args, **kwargs
38+
) -> None:
39+
super(RandomBrightness, self).__init__(*args, **kwargs)
40+
41+
assert 0 <= delta <= 255.0, "Delta must be between 0.0 and 255.0"
5242

53-
self._random_chance = random_chance
5443
self._delta = delta
5544

56-
def __call__(self, image:np.ndarray, annotation:np.ndarray)->typing.Tuple[np.ndarray, np.ndarray]:
45+
def __call__(self, image: np.ndarray, annotation: np.ndarray) -> typing.Tuple[np.ndarray, np.ndarray]:
46+
""" Randomly adjust image brightness
47+
48+
Args:
49+
image (np.ndarray): Image to be adjusted
50+
annotation (np.ndarray): Mask to be adjusted
51+
52+
Returns:
53+
image (np.ndarray): Adjusted image
54+
annotation (np.ndarray): Adjusted mask
55+
"""
5756
if np.random.rand() <= self._random_chance:
5857

5958
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
@@ -74,58 +73,112 @@ def __call__(self, image:np.ndarray, annotation:np.ndarray)->typing.Tuple[np.nda
7473
class RandomRotate(Augmentor):
7574
""" Randomly rotate image
7675
77-
Args:
78-
image (np.ndarray): Image to be rotated
79-
annotation (np.ndarray): Annotation to be rotated
80-
81-
Returns:
82-
image (np.ndarray): Rotated image
83-
annotation (np.ndarray): Rotated annotation
76+
Attributes:
77+
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
78+
angle (int): Angle between 0 and 180, setting image rotation borders
79+
borderValue (tuple): Tuple of 3 integers, setting border color for image rotation
80+
crop_borders (bool): Boolean value, setting if borders should be cropped after rotation
8481
"""
85-
def __init__(self, random_chance:float=0.5, angle:int=10, borderValue:typing.Tuple[int, int, int]=(255, 255, 255))->None:
86-
"""
87-
Args:
88-
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability
89-
angle (int): Integer value for rotation angle, in degrees
90-
borderValue (tuple): Tuple of 3 integers, setting border color for image rotation
91-
"""
92-
self._random_chance = random_chance
82+
def __init__(
83+
self,
84+
angle: int=30,
85+
borderValue: typing.Tuple[int, int, int]=None,
86+
crop_borders: bool=False,
87+
*args, **kwargs
88+
) -> None:
89+
super(RandomRotate, self).__init__(*args, **kwargs)
90+
9391
self._angle = angle
9492
self._borderValue = borderValue
93+
self._crop_borders = crop_borders
9594

9695
def __call__(self, image:np.ndarray, annotation:np.ndarray)->typing.Tuple[np.ndarray, np.ndarray]:
96+
""" Randomly rotate image
97+
98+
Args:
99+
image (np.ndarray): Image to be rotated
100+
annotation (np.ndarray): Mask to be rotated
101+
102+
Returns:
103+
image (np.ndarray): Rotated image
104+
annotation (np.ndarray): Rotated mask
105+
"""
97106
if np.random.rand() <= self._random_chance:
98107

99108
angle = np.random.uniform(-self._angle, self._angle)
100109

110+
# generate random border color
111+
borderValue = np.random.randint(0, 255, 3) if self._borderValue is None else self._borderValue
112+
borderValue = [int(v) for v in borderValue]
113+
101114
h, w, _ = image.shape
102-
m = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
103-
image = cv2.warpAffine(image, m, (w, h), borderValue=self._borderValue)
104-
# Check if annotation is image mask
105-
if not isinstance(annotation, str):
106-
annotation = cv2.warpAffine(annotation, m, (w, h), borderValue=self._borderValue)
115+
if self._crop_borders:
116+
m = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1)
117+
image = cv2.warpAffine(image, m, (w, h), borderValue=borderValue)
118+
# Check if annotation is image mask
119+
if not isinstance(annotation, str):
120+
annotation = cv2.warpAffine(annotation, m, (w, h), borderValue=0)
121+
122+
else:
123+
diagonal = round(np.sqrt((w*w) + (h*h)))
124+
top, bottom, left, right = round((diagonal-h) / 2), round((diagonal-h) / 2), round((diagonal-w) / 2), round((diagonal-w) / 2)
125+
padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT, value = borderValue)
126+
127+
padded_height, padded_width = padded_image.shape[:2]
128+
129+
transform_matrix = cv2.getRotationMatrix2D((padded_height/2, padded_width/2), angle, 1.0)
130+
131+
rotated_image = cv2.warpAffine(padded_image, transform_matrix, (diagonal, diagonal), flags=cv2.INTER_LANCZOS4, borderValue=borderValue)
132+
133+
# Find the indices of the non-black pixels in the image
134+
indices = np.argwhere(rotated_image != np.array(borderValue))
135+
136+
# Find the minimum and maximum row and column indices of the non-black pixels
137+
min_row, max_row = indices[:, 0].min(), indices[:, 0].max()
138+
min_col, max_col = indices[:, 1].min(), indices[:, 1].max()
139+
140+
# Crop the black borders from the image
141+
cropped_image = rotated_image[min_row:max_row+1, min_col:max_col+1]
142+
143+
if not isinstance(annotation, str):
144+
145+
padded_annotation = cv2.copyMakeBorder(annotation, top, bottom, left, right, borderType=cv2.BORDER_CONSTANT, value=0)
146+
147+
rotated_annotation = cv2.warpAffine(padded_annotation, transform_matrix, (diagonal, diagonal), flags=cv2.INTER_LANCZOS4, borderValue=0)
148+
149+
cropped_annotation = rotated_annotation[min_row:max_row+1, min_col:max_col+1]
150+
151+
return cropped_image, cropped_annotation
152+
153+
return cropped_image, annotation
107154

108155
return image, annotation
109156

110-
class RandomErodeDilate:
157+
class RandomErodeDilate(Augmentor):
111158
""" Randomly erode and dilate image
112159
113-
Args:
114-
image (np.ndarray): Image to be eroded and dilated
115-
116-
Returns:
117-
image (np.ndarray): Eroded and dilated image
160+
Attributes:
161+
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
162+
kernel_size (tuple): Tuple of 2 integers, setting kernel size for erosion and dilation
118163
"""
119-
def __init__(self, random_chance:float=0.5, kernel_size:typing.Tuple[int, int]=(1, 1))->None:
120-
"""
121-
Args:
122-
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability
123-
kernel_size (tuple): Tuple of 2 integers, setting kernel size for erosion and dilation
124-
"""
125-
self._random_chance = random_chance
164+
def __init__(
165+
self,
166+
kernel_size: typing.Tuple[int, int]=(1, 1),
167+
*args, **kwargs
168+
) -> None:
169+
super(RandomErodeDilate, self).__init__(*args, **kwargs)
170+
126171
self._kernel_size = kernel_size
127172

128173
def __call__(self, image:np.ndarray, annotation)->typing.Tuple[np.ndarray, np.ndarray]:
174+
""" Randomly erode and dilate image
175+
176+
Args:
177+
image (np.ndarray): Image to be eroded and dilated
178+
179+
Returns:
180+
image (np.ndarray): Eroded and dilated image
181+
"""
129182
if np.random.rand() <= self._random_chance:
130183

131184
kernel = np.ones(self._kernel_size, np.uint8)
@@ -135,4 +188,62 @@ def __call__(self, image:np.ndarray, annotation)->typing.Tuple[np.ndarray, np.nd
135188
else:
136189
image = cv2.dilate(image, kernel, iterations=1)
137190

191+
return image, annotation
192+
193+
class RandomSharpen(Augmentor):
194+
""" Randomly sharpen image
195+
196+
Attributes:
197+
alpha (float): Float between 0.0 and 1.0 setting bounds for random probability
198+
lightness_range (tuple): Tuple of 2 floats, setting bounds for random lightness change
199+
kernel (np.ndarray): Numpy array of kernel for image convolution
200+
kernel_anchor (np.ndarray): Numpy array of kernel anchor for image convolution
201+
random_chance (float, optional): Chance of applying the augmentor, where 1.0 is always and 0.0 is never. Defaults to 0.5.
202+
"""
203+
def __init__(
204+
self,
205+
alpha: float = 0.25,
206+
lightness_range: typing.Tuple = (0.75, 2.0),
207+
kernel: np.ndarray = None,
208+
kernel_anchor: np.ndarray = None,
209+
*args, **kwargs
210+
) -> None:
211+
super(RandomSharpen, self).__init__(*args, **kwargs)
212+
213+
self._alpha_range = (alpha, 1.0)
214+
self._ligtness_range = lightness_range
215+
self._lightness_anchor = 8
216+
217+
self._kernel = np.array([[-1, -1, -1], [-1, 1, -1], [-1, -1, -1]], dtype=np.float32) if kernel is None else kernel
218+
self._kernel_anchor = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32) if kernel_anchor is None else kernel_anchor
219+
220+
assert 0 <= alpha <= 1.0, "Alpha must be between 0.0 and 1.0"
221+
222+
def __call__(self, image: np.ndarray, annotation) -> typing.Tuple[np.ndarray, np.ndarray]:
223+
""" Randomly sharpen image
224+
225+
Args:
226+
image (np.ndarray): Image to be sharpened
227+
228+
Returns:
229+
image (np.ndarray): Sharpened image
230+
"""
231+
if np.random.rand() <= self._random_chance:
232+
233+
lightness = np.random.uniform(*self._ligtness_range)
234+
alpha = np.random.uniform(*self._alpha_range)
235+
236+
kernel = self._kernel_anchor * (self._lightness_anchor + lightness) + self._kernel
237+
kernel -= self._kernel_anchor
238+
kernel = (1 - alpha) * self._kernel_anchor + alpha * kernel
239+
240+
# Apply sharpening to each channel
241+
r, g, b = cv2.split(image)
242+
r_sharp = cv2.filter2D(r, -1, kernel)
243+
g_sharp = cv2.filter2D(g, -1, kernel)
244+
b_sharp = cv2.filter2D(b, -1, kernel)
245+
246+
# Merge the sharpened channels back into the original image
247+
image = cv2.merge([r_sharp, g_sharp, b_sharp])
248+
138249
return image, annotation

0 commit comments

Comments
 (0)