Skip to content

Commit 5476179

Browse files
committed
Merge branch 'develop'
2 parents 1d55cc4 + 0f13f28 commit 5476179

37 files changed

+1956
-167
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ dist
1313
.python-version
1414

1515
test
16-
build
16+
build
17+
yolov8*

CHANGELOG.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,25 @@
1+
## [1.2.0] - 2024-03-05
2+
### Changed
3+
- Creating code to work with Ultralytics YoloV8 Detection model (training and inference)
4+
- Updated previous tutorials to work with the latest mltu changes
5+
- Updated `mltu.augmentors.RandomRotate` to work with `Detections` objects
6+
- Changed to use `importlib` to import `librosa` in `mltu.preprocessors` to avoid import errors
7+
- Changed `mltu.torch.model.Model` object to provide more flexibility in training and validation
8+
- Improved `mltu.torch.callbacks` to provide more flexibility in training and validation
9+
10+
### Added
11+
- Added `mltu.torch.detection` module, that contains `Detections` and `Detection` objects, to handle detection annotations
12+
- Added `RandomDropBlock` and `RandomDropBlock` augmentors into `mltu.augmentors` to work with `Detections` objects
13+
- Added `ModelEMA` into `mltu.torch.model` to work with EMA (Exponential Moving Average) model
14+
- Added `FpsWrapper` into `mltu.inferenceModel` to automatically calculate FPS (Frames Per Second) when using inference model
15+
- Added `mltu.torch.yolo.detector.BaseDetector` as a base class for preprocessing and postprocessing detection models
16+
- Added `mltu.torch.yolo.detector.onnx_detector.Detector` as a class to handle YoloV8 onnx model detection inference
17+
- Added `mltu.torch.yolo.detector.torch_detector.Detector` as a class to handle YoloV8 torch model detection inference
18+
- Added `mltu.torch.yolo.loss.v8DetectionLoss` as a class to handle YoloV8 detection loss in training
19+
- Added `mltu.torch.yolo.metrics.YoloMetrics` as a class to handle YoloV8 detection metrics in training and validation
20+
- Added `mltu.torch.yolo.optimizer` module, that contains `AccumulativeOptimizer` object and `build_optimizer` function, to handle YoloV8 detection optimizer in training
21+
- Added YoloV8 Detection tutorial in `Tutorials.11_yolov8` that shows how to do basic inference with torch and exported onnx models
22+
123
## [1.1.8] - 2024-01-02
224
### Changed
325
- Fixed `setup.py` to include `mltu.torch` and `mltu.tensorflow` packages and other packages that are required for `mltu` to work properly

Tutorials/01_image_to_word/inferenceModel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
1111
self.char_list = char_list
1212

1313
def predict(self, image: np.ndarray):
14-
image = cv2.resize(image, self.input_shape[:2][::-1])
14+
image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
1515

1616
image_pred = np.expand_dims(image, axis=0).astype(np.float32)
1717

18-
preds = self.model.run(None, {self.input_name: image_pred})[0]
18+
preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
1919

2020
text = ctc_decoder(preds, self.char_list)[0]
2121

@@ -35,7 +35,7 @@ def predict(self, image: np.ndarray):
3535

3636
accum_cer = []
3737
for image_path, label in tqdm(df[:20]):
38-
image = cv2.imread(image_path)
38+
image = cv2.imread(image_path.replace("\\", "/"))
3939

4040
try:
4141
prediction_text = model.predict(image)

Tutorials/01_image_to_word/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77

88
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
99

10-
from mltu.dataProvider import DataProvider
1110
from mltu.preprocessors import ImageReader
1211
from mltu.annotations.images import CVImage
1312
from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding
13+
from mltu.tensorflow.dataProvider import DataProvider
1414
from mltu.tensorflow.losses import CTCloss
1515
from mltu.tensorflow.callbacks import Model2onnx, TrainLogger
1616
from mltu.tensorflow.metrics import CWERMetric

Tutorials/02_captcha_to_text/inferenceModel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
1111
self.char_list = char_list
1212

1313
def predict(self, image: np.ndarray):
14-
image = cv2.resize(image, self.input_shape[:2][::-1])
14+
image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
1515

1616
image_pred = np.expand_dims(image, axis=0).astype(np.float32)
1717

18-
preds = self.model.run(None, {self.input_name: image_pred})[0]
18+
preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
1919

2020
text = ctc_decoder(preds, self.char_list)[0]
2121

@@ -34,7 +34,7 @@ def predict(self, image: np.ndarray):
3434

3535
accum_cer = []
3636
for image_path, label in tqdm(df):
37-
image = cv2.imread(image_path)
37+
image = cv2.imread(image_path.replace("\\", "/"))
3838

3939
prediction_text = model.predict(image)
4040

Tutorials/03_handwriting_recognition/inferenceModel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
1111
self.char_list = char_list
1212

1313
def predict(self, image: np.ndarray):
14-
image = cv2.resize(image, self.input_shape[:2][::-1])
14+
image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
1515

1616
image_pred = np.expand_dims(image, axis=0).astype(np.float32)
1717

18-
preds = self.model.run(None, {self.input_name: image_pred})[0]
18+
preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
1919

2020
text = ctc_decoder(preds, self.char_list)[0]
2121

@@ -34,7 +34,7 @@ def predict(self, image: np.ndarray):
3434

3535
accum_cer = []
3636
for image_path, label in tqdm(df):
37-
image = cv2.imread(image_path)
37+
image = cv2.imread(image_path.replace("\\", "/"))
3838

3939
prediction_text = model.predict(image)
4040

Tutorials/04_sentence_recognition/inferenceModel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
1212
self.char_list = char_list
1313

1414
def predict(self, image: np.ndarray):
15-
image = ImageResizer.resize_maintaining_aspect_ratio(image, *self.input_shape[:2][::-1])
15+
image = ImageResizer.resize_maintaining_aspect_ratio(image, *self.input_shapes[0][1:3][::-1])
1616

1717
image_pred = np.expand_dims(image, axis=0).astype(np.float32)
1818

19-
preds = self.model.run(None, {self.input_name: image_pred})[0]
19+
preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
2020

2121
text = ctc_decoder(preds, self.char_list)[0]
2222

@@ -35,7 +35,7 @@ def predict(self, image: np.ndarray):
3535

3636
accum_cer, accum_wer = [], []
3737
for image_path, label in tqdm(df):
38-
image = cv2.imread(image_path)
38+
image = cv2.imread(image_path.replace("\\", "/"))
3939

4040
prediction_text = model.predict(image)
4141

Tutorials/05_sound_to_text/inferenceModel.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
1313
def predict(self, data: np.ndarray):
1414
data_pred = np.expand_dims(data, axis=0)
1515

16-
preds = self.model.run(None, {self.input_name: data_pred})[0]
16+
preds = self.model.run(self.output_names, {self.input_names[0]: data_pred})[0]
1717

1818
text = ctc_decoder(preds, self.char_list)[0]
1919

@@ -32,13 +32,13 @@ def predict(self, data: np.ndarray):
3232

3333
accum_cer, accum_wer = [], []
3434
for wav_path, label in tqdm(df):
35-
35+
wav_path = wav_path.replace("\\", "/")
3636
spectrogram = WavReader.get_spectrogram(wav_path, frame_length=configs.frame_length, frame_step=configs.frame_step, fft_length=configs.fft_length)
37-
# WavReader.plot_raw_audio(wav_path, label)
37+
WavReader.plot_raw_audio(wav_path, label)
3838

3939
padded_spectrogram = np.pad(spectrogram, ((0, configs.max_spectrogram_length - spectrogram.shape[0]),(0,0)), mode="constant", constant_values=0)
4040

41-
# WavReader.plot_spectrogram(spectrogram, label)
41+
WavReader.plot_spectrogram(spectrogram, label)
4242

4343
text = model.predict(padded_spectrogram)
4444

Tutorials/08_handwriting_recognition_torch/inferenceModel.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ def __init__(self, *args, **kwargs):
1010
super().__init__(*args, **kwargs)
1111

1212
def predict(self, image: np.ndarray):
13-
image = cv2.resize(image, self.input_shape[:2][::-1])
13+
image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
1414

1515
image_pred = np.expand_dims(image, axis=0).astype(np.float32)
1616

17-
preds = self.model.run(None, {self.input_name: image_pred})[0]
17+
preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
1818

19-
text = ctc_decoder(preds, self.vocab)[0]
19+
text = ctc_decoder(preds, self.metadata["vocab"])[0]
2020

2121
return text
2222

@@ -30,7 +30,7 @@ def predict(self, image: np.ndarray):
3030

3131
accum_cer = []
3232
for image_path, label in tqdm(df):
33-
image = cv2.imread(image_path)
33+
image = cv2.imread(image_path.replace("\\", "/"))
3434

3535
prediction_text = model.predict(image)
3636

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import torch
2+
from ultralytics.engine.model import Model as BaseModel
3+
4+
base_model = BaseModel("yolov8m.pt")
5+
6+
classes = base_model.names
7+
input_width, input_height = 640, 640
8+
input_shape = (1, 3, input_width, input_height)
9+
model = base_model.model
10+
11+
# place model on cpu
12+
model.to("cpu")
13+
14+
# set the model to inference mode
15+
model.eval()
16+
17+
# convert the model to ONNX format
18+
dummy_input = torch.randn(input_shape).to("cpu")
19+
20+
# Export the model
21+
torch.onnx.export(
22+
model,
23+
dummy_input,
24+
"yolov8m.onnx",
25+
export_params=True,
26+
input_names = ["input"],
27+
output_names = ["output"],
28+
dynamic_axes = {
29+
"input": {0: "batch_size", 2: "height", 3: "width"},
30+
"output": {0: "batch_size", 2: "anchors"}
31+
}
32+
)
33+
34+
# Add the class names to the model as metadata
35+
import onnx
36+
37+
metadata = {"classes": classes}
38+
39+
# Load the ONNX model
40+
onnx_model = onnx.load("yolov8m.onnx")
41+
42+
# Add the metadata dictionary to the onnx model's metadata_props attribute
43+
for key, value in metadata.items():
44+
meta = onnx_model.metadata_props.add()
45+
meta.key = key
46+
meta.value = str(value)
47+
48+
# Save the modified ONNX model
49+
onnx.save(onnx_model, "yolov8m.onnx")

0 commit comments

Comments
 (0)