pythonlessons
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 22 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎Tutorials/01_image_to_word/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions b/‎Tutorials/01_image_to_word/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Tutorials/01_image_to_word/train.py‎
Lines changed: 1 addition & 1 deletion b/‎Tutorials/01_image_to_word/train.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Tutorials/02_captcha_to_text/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions b/‎Tutorials/02_captcha_to_text/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Tutorials/03_handwriting_recognition/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions b/‎Tutorials/03_handwriting_recognition/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Tutorials/04_sentence_recognition/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions b/‎Tutorials/04_sentence_recognition/inferenceModel.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎Tutorials/05_sound_to_text/inferenceModel.py‎
Lines changed: 4 additions & 4 deletions b/‎Tutorials/05_sound_to_text/inferenceModel.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Tutorials/08_handwriting_recognition_torch/inferenceModel.py‎
Lines changed: 4 additions & 4 deletions b/‎Tutorials/08_handwriting_recognition_torch/inferenceModel.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎Tutorials/11_Yolov8/convert2onnx.py‎
Lines changed: 49 additions & 0 deletions b/‎Tutorials/11_Yolov8/convert2onnx.py‎
Lines changed: 49 additions & 0 deletions
@@ -13,4 +13,5 @@ dist
 .python-version
 
 test
-build
+build
+yolov8*
@@ -1,3 +1,25 @@
+## [1.2.0] - 2024-03-05
+### Changed
+- Creating code to work with Ultralytics YoloV8 Detection model (training and inference)
+- Updated previous tutorials to work with the latest mltu changes
+- Updated `mltu.augmentors.RandomRotate` to work with `Detections` objects
+- Changed to use `importlib` to import `librosa` in `mltu.preprocessors` to avoid import errors
+- Changed `mltu.torch.model.Model` object to provide more flexibility in training and validation
+- Improved `mltu.torch.callbacks` to provide more flexibility in training and validation
+
+### Added
+- Added `mltu.torch.detection` module, that contains `Detections` and `Detection` objects, to handle detection annotations
+- Added `RandomDropBlock` and `RandomDropBlock` augmentors into `mltu.augmentors` to work with `Detections` objects
+- Added `ModelEMA` into `mltu.torch.model` to work with EMA (Exponential Moving Average) model
+- Added `FpsWrapper` into `mltu.inferenceModel` to automatically calculate FPS (Frames Per Second) when using inference model
+- Added `mltu.torch.yolo.detector.BaseDetector` as a base class for preprocessing and postprocessing detection models
+- Added `mltu.torch.yolo.detector.onnx_detector.Detector` as a class to handle YoloV8 onnx model detection inference
+- Added `mltu.torch.yolo.detector.torch_detector.Detector` as a class to handle YoloV8 torch model detection inference
+- Added `mltu.torch.yolo.loss.v8DetectionLoss` as a class to handle YoloV8 detection loss in training
+- Added `mltu.torch.yolo.metrics.YoloMetrics` as a class to handle YoloV8 detection metrics in training and validation
+- Added `mltu.torch.yolo.optimizer` module, that contains `AccumulativeOptimizer` object and `build_optimizer` function, to handle YoloV8 detection optimizer in training
+- Added YoloV8 Detection tutorial in `Tutorials.11_yolov8` that shows how to do basic inference with torch and exported onnx models
+
 ## [1.1.8] - 2024-01-02
 ### Changed
 - Fixed `setup.py` to include `mltu.torch` and `mltu.tensorflow` packages and other packages that are required for `mltu` to work properly
 
@@ -11,11 +11,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
         self.char_list = char_list
 
     def predict(self, image: np.ndarray):
-        image = cv2.resize(image, self.input_shape[:2][::-1])
+        image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
 
         image_pred = np.expand_dims(image, axis=0).astype(np.float32)
 
-        preds = self.model.run(None, {self.input_name: image_pred})[0]
+        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
 
         text = ctc_decoder(preds, self.char_list)[0]
 
@@ -35,7 +35,7 @@ def predict(self, image: np.ndarray):
 
     accum_cer = []
     for image_path, label in tqdm(df[:20]):
-        image = cv2.imread(image_path)
+        image = cv2.imread(image_path.replace("\\", "/"))
 
         try:
             prediction_text = model.predict(image)
 
@@ -7,10 +7,10 @@
 
 from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
 
-from mltu.dataProvider import DataProvider
 from mltu.preprocessors import ImageReader
 from mltu.annotations.images import CVImage
 from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding
+from mltu.tensorflow.dataProvider import DataProvider
 from mltu.tensorflow.losses import CTCloss
 from mltu.tensorflow.callbacks import Model2onnx, TrainLogger
 from mltu.tensorflow.metrics import CWERMetric
 
@@ -11,11 +11,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
         self.char_list = char_list
 
     def predict(self, image: np.ndarray):
-        image = cv2.resize(image, self.input_shape[:2][::-1])
+        image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
 
         image_pred = np.expand_dims(image, axis=0).astype(np.float32)
 
-        preds = self.model.run(None, {self.input_name: image_pred})[0]
+        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
 
         text = ctc_decoder(preds, self.char_list)[0]
 
@@ -34,7 +34,7 @@ def predict(self, image: np.ndarray):
 
     accum_cer = []
     for image_path, label in tqdm(df):
-        image = cv2.imread(image_path)
+        image = cv2.imread(image_path.replace("\\", "/"))
 
         prediction_text = model.predict(image)
 
 
@@ -11,11 +11,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
         self.char_list = char_list
 
     def predict(self, image: np.ndarray):
-        image = cv2.resize(image, self.input_shape[:2][::-1])
+        image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
 
         image_pred = np.expand_dims(image, axis=0).astype(np.float32)
 
-        preds = self.model.run(None, {self.input_name: image_pred})[0]
+        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
 
         text = ctc_decoder(preds, self.char_list)[0]
 
@@ -34,7 +34,7 @@ def predict(self, image: np.ndarray):
 
     accum_cer = []
     for image_path, label in tqdm(df):
-        image = cv2.imread(image_path)
+        image = cv2.imread(image_path.replace("\\", "/"))
 
         prediction_text = model.predict(image)
 
 
@@ -12,11 +12,11 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
         self.char_list = char_list
 
     def predict(self, image: np.ndarray):
-        image = ImageResizer.resize_maintaining_aspect_ratio(image, *self.input_shape[:2][::-1])
+        image = ImageResizer.resize_maintaining_aspect_ratio(image, *self.input_shapes[0][1:3][::-1])
 
         image_pred = np.expand_dims(image, axis=0).astype(np.float32)
 
-        preds = self.model.run(None, {self.input_name: image_pred})[0]
+        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
 
         text = ctc_decoder(preds, self.char_list)[0]
 
@@ -35,7 +35,7 @@ def predict(self, image: np.ndarray):
 
     accum_cer, accum_wer = [], []
     for image_path, label in tqdm(df):
-        image = cv2.imread(image_path)
+        image = cv2.imread(image_path.replace("\\", "/"))
 
         prediction_text = model.predict(image)
 
 
@@ -13,7 +13,7 @@ def __init__(self, char_list: typing.Union[str, list], *args, **kwargs):
     def predict(self, data: np.ndarray):
         data_pred = np.expand_dims(data, axis=0)
 
-        preds = self.model.run(None, {self.input_name: data_pred})[0]
+        preds = self.model.run(self.output_names, {self.input_names[0]: data_pred})[0]
 
         text = ctc_decoder(preds, self.char_list)[0]
 
@@ -32,13 +32,13 @@ def predict(self, data: np.ndarray):
 
     accum_cer, accum_wer = [], []
     for wav_path, label in tqdm(df):
-        
+        wav_path = wav_path.replace("\\", "/")
         spectrogram = WavReader.get_spectrogram(wav_path, frame_length=configs.frame_length, frame_step=configs.frame_step, fft_length=configs.fft_length)
-        # WavReader.plot_raw_audio(wav_path, label)
+        WavReader.plot_raw_audio(wav_path, label)
 
         padded_spectrogram = np.pad(spectrogram, ((0, configs.max_spectrogram_length - spectrogram.shape[0]),(0,0)), mode="constant", constant_values=0)
 
-        # WavReader.plot_spectrogram(spectrogram, label)
+        WavReader.plot_spectrogram(spectrogram, label)
 
         text = model.predict(padded_spectrogram)
 
 
@@ -10,13 +10,13 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def predict(self, image: np.ndarray):
-        image = cv2.resize(image, self.input_shape[:2][::-1])
+        image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
 
         image_pred = np.expand_dims(image, axis=0).astype(np.float32)
 
-        preds = self.model.run(None, {self.input_name: image_pred})[0]
+        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
 
-        text = ctc_decoder(preds, self.vocab)[0]
+        text = ctc_decoder(preds, self.metadata["vocab"])[0]
 
         return text
 
@@ -30,7 +30,7 @@ def predict(self, image: np.ndarray):
 
     accum_cer = []
     for image_path, label in tqdm(df):
-        image = cv2.imread(image_path)
+        image = cv2.imread(image_path.replace("\\", "/"))
 
         prediction_text = model.predict(image)
 
 
@@ -0,0 +1,49 @@
+import torch
+from ultralytics.engine.model import Model as BaseModel
+
+base_model = BaseModel("yolov8m.pt")
+
+classes = base_model.names
+input_width, input_height = 640, 640
+input_shape = (1, 3, input_width, input_height)
+model = base_model.model
+
+# place model on cpu
+model.to("cpu")
+
+# set the model to inference mode
+model.eval()
+
+# convert the model to ONNX format
+dummy_input = torch.randn(input_shape).to("cpu")
+
+# Export the model
+torch.onnx.export(
+    model,               
+    dummy_input,                         
+    "yolov8m.onnx",   
+    export_params=True,        
+    input_names = ["input"],   
+    output_names = ["output"], 
+    dynamic_axes = {
+        "input": {0: "batch_size", 2: "height", 3: "width"}, 
+        "output": {0: "batch_size", 2: "anchors"}
+        }
+)
+
+# Add the class names to the model as metadata
+import onnx
+
+metadata = {"classes": classes}
+
+# Load the ONNX model
+onnx_model = onnx.load("yolov8m.onnx")
+
+# Add the metadata dictionary to the onnx model's metadata_props attribute
+for key, value in metadata.items():
+    meta = onnx_model.metadata_props.add()
+    meta.key = key
+    meta.value = str(value)
+
+# Save the modified ONNX model
+onnx.save(onnx_model, "yolov8m.onnx")
-Original file line number
+Diff line change
 .python-version
 test
 -build
 +build
 +yolov8*