Skip to content

Commit 4bc9edc

Browse files
committed
Merge branch 'develop'
2 parents 42aa5c8 + b4181cc commit 4bc9edc

File tree

10 files changed

+170
-29
lines changed

10 files changed

+170
-29
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
## [1.2.4] - 2024-03-21
2+
### Added
3+
- Added `RandomElasticTransform` to `mltu.augmentors` to work with `Image` objects
4+
- Added `xyxy_abs` to `mltu.annotations.detections.Detection` object to return absolute bounding boxes
5+
6+
### Changes
7+
- Changed `ImageShowCV2` transformer in `mltu.transformers` to display images when running with multiple threads
8+
9+
110
## [1.2.3] - 2024-03-17
211
### Added
312
- Added Tutorial how to run YOLOv8 pretrained Object Detection model `Tutorials.11_Yolov8.README.md`

Tutorials/02_captcha_to_text/train.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,11 @@ def download_and_unzip(url, extract_to="Datasets"):
8686
os.makedirs(configs.model_path, exist_ok=True)
8787

8888
# Define callbacks
89-
earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1)
89+
earlystopper = EarlyStopping(monitor="val_CER", patience=50, verbose=1, mode="min")
9090
checkpoint = ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
9191
trainLogger = TrainLogger(configs.model_path)
9292
tb_callback = TensorBoard(f"{configs.model_path}/logs", update_freq=1)
93-
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="auto")
93+
reduceLROnPlat = ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=20, verbose=1, mode="min")
9494
model2onnx = Model2onnx(f"{configs.model_path}/model.h5")
9595

9696
# Train the model

Tutorials/11_Yolov8/README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ onnx.save(onnx_model, "yolov8m.onnx")
105105
## Run the YOLOv8 ONNX model with ONNX Runtime:
106106
```python
107107
import cv2
108-
from ultralytics.engine.model import Model as BaseModel
109108
from mltu.torch.yolo.detectors.onnx_detector import Detector as OnnxDetector
110109

111110
input_width, input_height = 640, 640

mltu/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.2.3"
1+
__version__ = "1.2.4"
22

33
from .annotations.images import Image
44
from .annotations.images import CVImage

mltu/annotations/detections.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ def xywh(self, xywh: np.ndarray):
8282
@property
8383
def xyxy(self):
8484
return self._xyxy
85+
86+
@property
87+
def xyxy_abs(self):
88+
return (self.xyxy * np.array([self.width, self.height, self.width, self.height])).astype(int)
8589

8690
@staticmethod
8791
def xywh2xyxy(xywh: np.ndarray):
@@ -275,6 +279,9 @@ def validate(self):
275279
if isinstance(self.labels, list):
276280
self.labels = {i: label for i, label in enumerate(self.labels)}
277281

282+
if not self.labels:
283+
self.labels = {k: v for k, v in enumerate(sorted(set([detection.label for detection in self.detections])))}
284+
278285
def applyToFrame(self, image: np.ndarray, **kwargs: dict) -> np.ndarray:
279286
""" Draw the detections on the image """
280287
for detection in self.detections:

mltu/augmentors.py

Lines changed: 109 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
- RandomMosaic
2323
- RandomZoom
2424
- RandomColorMode
25+
- RandomElasticTransform
2526
2627
Implemented audio augmentors:
2728
- RandomAudioNoise
@@ -494,14 +495,14 @@ def __init__(
494495
self,
495496
random_chance: float = 0.5,
496497
log_level: int = logging.INFO,
497-
augment_annotation: bool = False,
498+
augment_annotation: bool = True,
498499
) -> None:
499500
""" Randomly mirror image
500501
501502
Args:
502503
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
503504
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
504-
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
505+
augment_annotation (bool): Whether to augment the annotation. Defaults to True.
505506
"""
506507
super(RandomMirror, self).__init__(random_chance, log_level, augment_annotation)
507508

@@ -534,14 +535,14 @@ def __init__(
534535
self,
535536
random_chance: float = 0.5,
536537
log_level: int = logging.INFO,
537-
augment_annotation: bool = False,
538+
augment_annotation: bool = True,
538539
) -> None:
539540
""" Randomly mirror image
540541
541542
Args:
542543
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
543544
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
544-
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
545+
augment_annotation (bool): Whether to augment the annotation. Defaults to True.
545546
"""
546547
super(RandomFlip, self).__init__(random_chance, log_level, augment_annotation)
547548

@@ -839,6 +840,110 @@ def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image,
839840
return image, annotation
840841

841842

843+
class RandomElasticTransform(Augmentor):
844+
""" Randomly apply elastic transform to an image
845+
846+
Attributes:
847+
random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
848+
alpha_range (tuple): Tuple of 2 floats, setting bounds for random alpha value. Defaults to (0, 0.1).
849+
sigma_range (tuple): Tuple of 2 floats, setting bounds for random sigma value. Defaults to (0.01, 0.02).
850+
log_level (int): Log level for the augmentor. Defaults to logging.INFO.
851+
augment_annotation (bool): Whether to augment the annotation. Defaults to False.
852+
"""
853+
def __init__(
854+
self,
855+
random_chance: float = 0.5,
856+
alpha_range: tuple = (0, 0.1),
857+
sigma_range: tuple = (0.01, 0.02),
858+
log_level: int = logging.INFO,
859+
augment_annotation: bool = True,
860+
) -> None:
861+
super(RandomElasticTransform, self).__init__(random_chance, log_level, augment_annotation)
862+
self.alpha_range = alpha_range
863+
self.sigma_range = sigma_range
864+
865+
@staticmethod
866+
def elastic_transform(image: np.ndarray, alpha: float, sigma: float) -> typing.Tuple[np.ndarray, np.ndarray, np.ndarray]:
867+
""" Apply elastic transform to an image
868+
869+
Args:
870+
image (np.ndarray): Image to be used for elastic transform
871+
alpha (float): Alpha value for elastic transform
872+
sigma (float): Sigma value for elastic transform
873+
874+
Returns:
875+
remap_fn (np.ndarray): Elastic transformed image
876+
dx (np.ndarray): X-axis displacement
877+
dy (np.ndarray): Y-axis displacement
878+
"""
879+
height, width, channels = image.shape
880+
dx = np.random.rand(height, width).astype(np.float32) * 2 - 1
881+
dy = np.random.rand(height, width).astype(np.float32) * 2 - 1
882+
883+
cv2.GaussianBlur(dx, (0, 0), sigma, dst=dx)
884+
cv2.GaussianBlur(dy, (0, 0), sigma, dst=dy)
885+
886+
dx *= alpha
887+
dy *= alpha
888+
889+
x, y = np.meshgrid(np.arange(width), np.arange(height))
890+
891+
map_x = np.float32(x + dx)
892+
map_y = np.float32(y + dy)
893+
894+
remap_fn = cv2.remap(image, map_x, map_y, interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT)
895+
896+
return remap_fn, dx, dy
897+
898+
@randomness_decorator
899+
def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
900+
""" Randomly apply elastic transform to an image
901+
902+
Args:
903+
image (Image): Image to be used for elastic transform
904+
annotation (typing.Any): Annotation to be used for elastic transform
905+
906+
Returns:
907+
image (Image): Elastic transformed image
908+
annotation (typing.Any): Elastic transformed annotation if necessary
909+
"""
910+
alpha = image.width * np.random.uniform(*self.alpha_range)
911+
sigma = image.width * np.random.uniform(*self.sigma_range)
912+
new_image, dx, dy = self.elastic_transform(image.numpy(), alpha, sigma)
913+
image.update(new_image)
914+
915+
if isinstance(annotation, Detections) and self._augment_annotation:
916+
detections = []
917+
for detection in annotation:
918+
x_min, y_min, x_max, y_max = detection.xyxy_abs
919+
new_x_min = min(max(0, x_min + dx[y_min, x_min]), image.width - 1)
920+
new_y_min = min(max(0, y_min + dy[y_min, x_min]), image.height - 1)
921+
new_x_max = min(max(0, x_max + dx[y_max, x_max]), image.width - 1)
922+
new_y_max = min(max(0, y_max + dy[y_max, x_max]), image.height - 1)
923+
detections.append(
924+
Detection(
925+
[new_x_min, new_y_min, new_x_max, new_y_max],
926+
label=detection.label,
927+
labels=detection.labels,
928+
confidence=detection.confidence,
929+
image_path=detection.image_path,
930+
width=image.width,
931+
height=image.height,
932+
relative=False,
933+
bbox_type = BboxType.XYXY
934+
)
935+
)
936+
937+
annotation = Detections(
938+
labels=annotation.labels,
939+
width=image.width,
940+
height=image.height,
941+
detections=detections
942+
)
943+
944+
return image, annotation
945+
946+
842947
class RandomAudioNoise(Augmentor):
843948
""" Randomly add noise to audio
844949

mltu/tensorflow/metrics.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
5555
self.cer_accumulator.assign_add(tf.reduce_sum(distance))
5656

5757
# Increment the batch_counter by the batch size
58-
self.batch_counter.assign_add(len(y_true))
58+
self.batch_counter.assign_add(input_shape[0])
5959

6060
# Calculate the number of wrong words in batch and add to wer_accumulator variable
6161
self.wer_accumulator.assign_add(tf.reduce_sum(tf.cast(tf.not_equal(distance, 0), tf.float32)))
@@ -146,7 +146,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
146146
self.cer_accumulator.assign_add(tf.reduce_sum(distance))
147147

148148
# Increment the batch_counter by the batch size
149-
self.batch_counter.assign_add(len(y_true))
149+
self.batch_counter.assign_add(input_shape[0])
150150

151151
def result(self):
152152
""" Computes and returns the metric result.
@@ -253,7 +253,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
253253
self.wer_accumulator.assign_add(tf.reduce_sum(tf.cast(distance, tf.float32)))
254254

255255
# Increment the batch_counter by the batch size
256-
self.batch_counter.assign_add(len(y_true))
256+
self.batch_counter.assign_add(input_shape[0])
257257

258258
def result(self):
259259
"""Computes and returns the metric result.

mltu/torch/yolo/annotation.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@ class VOCAnnotationReader:
1010
def __init__(self, labels: dict, images_path: str=None):
1111
self.labels = labels
1212
self.images_path = images_path
13+
self.dataset_found_labels = {}
1314

1415
@staticmethod
15-
def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -> Detections:
16+
def readFromVOC(voc_annotation_path: str, labels: dict={}, images_path: str=None) -> Detections:
1617
annotation_path = Path(voc_annotation_path)
1718
tree = ET.parse(voc_annotation_path)
1819
root = tree.getroot()
@@ -49,7 +50,7 @@ def readFromVOC(voc_annotation_path: str, labels: dict, images_path: str=None) -
4950
image_path = os.path.join(images_path, annotation_dict['filename'])
5051
dets = []
5152
for obj in annotation_dict['objects']:
52-
if obj['name'] not in labels.values():
53+
if labels and obj['name'] not in labels.values():
5354
print(f"Label {obj['name']} not found in labels")
5455
continue
5556

mltu/torch/yolo/train_yolo.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from mltu.annotations.images import CVImage
66
from mltu.transformers import ImageResizer, ImageShowCV2, ImageNormalizer
77
from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen, \
8-
RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic
8+
RandomMirror, RandomFlip, RandomGaussianBlur, RandomSaltAndPepper, RandomDropBlock, RandomMosaic, RandomElasticTransform
99
from mltu.torch.model import Model
1010
from mltu.torch.dataProvider import DataProvider
1111
from mltu.torch.yolo.annotation import VOCAnnotationReader
@@ -21,6 +21,7 @@
2121

2222
annotations_path = "Datasets/car-plate-detection/annotations"
2323

24+
# Create a dataset from the annotations, the dataset is a list of lists where each list contains the [image path, annotation path]
2425
dataset = [[None, os.path.join(annotations_path, f)] for f in os.listdir(annotations_path)]
2526

2627
# Make sure torch can see GPU device, it is not recommended to train with CPU
@@ -49,9 +50,6 @@
4950
numpy=False,
5051
)
5152

52-
# for b in data_provider:
53-
# pass
54-
5553
# split the dataset into train and test
5654
train_data_provider, val_data_provider = data_provider.split(0.9, shuffle=False)
5755

@@ -62,20 +60,14 @@
6260
RandomSharpen(),
6361
RandomMirror(),
6462
RandomFlip(),
63+
RandomElasticTransform(),
6564
RandomGaussianBlur(),
6665
RandomSaltAndPepper(),
6766
RandomRotate(angle=10),
6867
RandomDropBlock(),
6968
RandomMosaic(),
7069
]
7170

72-
# for batch in train_data_provider:
73-
# pass
74-
# print(batch)
75-
# break
76-
77-
78-
7971
base_model = BaseModel("yolov8n.pt")
8072
# Create a YOLO model
8173
model = DetectionModel('yolov8n.yaml', nc=len(labels))

mltu/transformers.py

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import cv2
2+
import time
3+
import queue
24
import typing
35
import logging
6+
import threading
47
import importlib
58
import numpy as np
69

@@ -344,6 +347,26 @@ def __init__(
344347
super(ImageShowCV2, self).__init__(log_level=log_level)
345348
self.verbose = verbose
346349
self.name = name
350+
self.thread_started = False
351+
352+
def init_thread(self):
353+
if not self.thread_started:
354+
self.thread_started = True
355+
self.image_queue = queue.Queue()
356+
357+
# Start a new thread to display the images, so that the main loop could run in multiple threads
358+
self.thread = threading.Thread(target=self._display_images)
359+
self.thread.start()
360+
361+
def _display_images(self) -> None:
362+
""" Display images in a continuous loop """
363+
while True:
364+
image, label = self.image_queue.get()
365+
if isinstance(label, Image):
366+
cv2.imshow(self.name + "Label", label.numpy())
367+
cv2.imshow(self.name, image.numpy())
368+
cv2.waitKey(0)
369+
cv2.destroyAllWindows()
347370

348371
def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typing.Any]:
349372
""" Show image for visual inspection
@@ -356,6 +379,9 @@ def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typin
356379
data (np.ndarray): Image data
357380
label (np.ndarray): Label data (unchanged)
358381
"""
382+
# Start cv2 image display thread
383+
self.init_thread()
384+
359385
if self.verbose:
360386
if isinstance(label, (str, int, float)):
361387
self.logger.info(f"Label: {label}")
@@ -365,10 +391,12 @@ def __call__(self, image: Image, label: typing.Any) -> typing.Tuple[Image, typin
365391
img = detection.applyToFrame(np.asarray(image.numpy()))
366392
image.update(img)
367393

368-
cv2.imshow(self.name, image.numpy())
369-
if isinstance(label, Image):
370-
cv2.imshow(self.name+"Label", label.numpy())
371-
cv2.waitKey(0)
372-
cv2.destroyAllWindows()
394+
# Add image to display queue
395+
# Sleep if queue is not empty
396+
while not self.image_queue.empty():
397+
time.sleep(0.5)
398+
399+
# Add image to display queue
400+
self.image_queue.put((image, label))
373401

374402
return image, label

0 commit comments

Comments
 (0)