Merge pull request #2675 from arjun-sachar/dev_1.20.0

beat-buesser · web-flow · commit 6c41ca8cb191 · 2025-06-30T11:04:03.000+02:00
Implementing Yolo  v8+ Dependencies
diff --git a/art/estimators/object_detection/pytorch_yolo.py b/art/estimators/object_detection/pytorch_yolo.py
@@ -16,7 +16,7 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 """
-This module implements the task specific estimator for PyTorch YOLO v3 and v5 object detectors.
+This module implements the task specific estimator for PyTorch YOLO v3, v5, v8+ object detectors.
 
 | Paper link: https://arxiv.org/abs/1804.02767
 """
@@ -42,7 +42,7 @@
 
 class PyTorchYolo(PyTorchObjectDetector):
     """
-    This module implements the model- and task specific estimator for YOLO v3, v5 object detector models in PyTorch.
+    This module implements the model- and task specific estimator for YOLO object detector models in PyTorch.
 
     | Paper link: https://arxiv.org/abs/1804.02767
     """
@@ -65,11 +65,12 @@ def __init__(
         ),
         device_type: str = "gpu",
         is_yolov8: bool = False,
+        model_name: str | None = None,
     ):
         """
         Initialization.
 
-        :param model: YOLO v3 or v5 model wrapped as demonstrated in examples/get_started_yolo.py.
+        :param model: YOLO v3, v5, or v8+ model wrapped as demonstrated in examples/get_started_yolo.py.
                       The output of the model is `list[dict[str, torch.Tensor]]`, one for each input image.
                       The fields of the dict are as follows:
 
@@ -93,8 +94,15 @@ def __init__(
                               'loss_objectness', and 'loss_rpn_box_reg'.
         :param device_type: Type of device to be used for model and tensors, if `cpu` run on CPU, if `gpu` run on GPU
                             if available otherwise run on CPU.
-        :param is_yolov8: The flag to be used for marking the YOLOv8 model.
+        :param is_yolov8: The flag to be used for marking the YOLOv8+ model.
+        :param model_name: The name of the model (e.g., 'yolov8n', 'yolov10n') for determining loss function.
         """
+        # Wrap the model with YoloWrapper if it's a YOLO v8+ model
+        if is_yolov8:
+            from art.estimators.object_detection.pytorch_yolo_loss_wrapper import PyTorchYoloLossWrapper
+
+            model = PyTorchYoloLossWrapper(model, model_name)
+
         super().__init__(
             model=model,
             input_shape=input_shape,
@@ -154,20 +162,31 @@ def _translate_predictions(self, predictions: "torch.Tensor") -> list[dict[str,
         Translate object detection predictions from the model format (YOLO) to ART format (torchvision) and
         convert tensors to numpy arrays.
 
-        :param predictions: Object detection labels in format xcycwh (YOLO).
+        :param predictions: Object detection labels in format xcycwh (YOLO) or list of dicts (YOLO v8+).
         :return: Object detection labels in format x1y1x2y2 (torchvision).
         """
         import torch
 
+        predictions_x1y1x2y2: list[dict[str, np.ndarray]] = []
+
+        # Handle YOLO v8+ predictions (list of dicts)
+        if isinstance(predictions, list) and len(predictions) > 0 and isinstance(predictions[0], dict):
+            for pred in predictions:
+                prediction = {}
+                prediction["boxes"] = pred["boxes"].detach().cpu().numpy()
+                prediction["labels"] = pred["labels"].detach().cpu().numpy()
+                prediction["scores"] = pred["scores"].detach().cpu().numpy()
+                predictions_x1y1x2y2.append(prediction)
+            return predictions_x1y1x2y2
+
+        # Handle traditional YOLO predictions (tensor format)
         if self.channels_first:
             height = self.input_shape[1]
             width = self.input_shape[2]
         else:
             height = self.input_shape[0]
             width = self.input_shape[1]
 
-        predictions_x1y1x2y2: list[dict[str, np.ndarray]] = []
-
         for pred in predictions:
             boxes = torch.vstack(
                 [
diff --git a/art/estimators/object_detection/pytorch_yolo_loss_wrapper.py b/art/estimators/object_detection/pytorch_yolo_loss_wrapper.py
@@ -0,0 +1,77 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2025
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+PyTorch-specific YOLO loss wrapper for ART for yolo versions 8 and above.
+"""
+
+import torch
+
+
+class PyTorchYoloLossWrapper(torch.nn.Module):
+    """Wrapper for YOLO v8+ models to handle loss dict format."""
+
+    def __init__(self, model, name):
+        super().__init__()
+        self.model = model
+        try:
+            from ultralytics.models.yolo.detect import DetectionPredictor
+            from ultralytics.utils.loss import v8DetectionLoss, E2EDetectLoss
+
+            self.detection_predictor = DetectionPredictor()
+            self.model.args = self.detection_predictor.args
+            if "v10" in name:
+                self.model.criterion = E2EDetectLoss(model)
+            else:
+                self.model.criterion = v8DetectionLoss(model)
+        except ImportError as e:
+            raise ImportError("The 'ultralytics' package is required for YOLO v8+ models but not installed.") from e
+
+    def forward(self, x, targets=None):
+        if self.training:
+            boxes = []
+            labels = []
+            indices = []
+            for i, item in enumerate(targets):
+                boxes.append(item["boxes"])
+                labels.append(item["labels"])
+                indices = indices + ([i] * len(item["labels"]))
+            items = {
+                "boxes": torch.cat(boxes) / x.shape[2],
+                "labels": torch.cat(labels).type(torch.float32),
+                "batch_idx": torch.tensor(indices),
+            }
+            items["bboxes"] = items.pop("boxes")
+            items["cls"] = items.pop("labels")
+            items["img"] = x
+            loss, loss_components = self.model.loss(items)
+            loss_components_dict = {"loss_total": loss.sum()}
+            loss_components_dict["loss_box"] = loss_components[0].sum()
+            loss_components_dict["loss_cls"] = loss_components[1].sum()
+            loss_components_dict["loss_dfl"] = loss_components[2].sum()
+            return loss_components_dict
+        else:
+            preds = self.model(x)
+            self.detection_predictor.model = self.model
+            self.detection_predictor.batch = [x]
+            preds = self.detection_predictor.postprocess(preds, x, x)
+            items = []
+            for pred in preds:
+                items.append(
+                    {"boxes": pred.boxes.xyxy, "scores": pred.boxes.conf, "labels": pred.boxes.cls.type(torch.int)}
+                )
+            return items
diff --git a/requirements_test.txt b/requirements_test.txt
@@ -35,6 +35,9 @@ torchvision==0.22.1
 # PyTorch image transformers
 timm==1.0.15
 
+# YOLO dependencies
+ultralytics==8.3.159
+
 catboost==1.2.8
 GPy==1.13.2
 lightgbm==4.6.0
diff --git a/tests/estimators/object_detection/test_pytorch_yolo.py b/tests/estimators/object_detection/test_pytorch_yolo.py
@@ -322,3 +322,39 @@ def test_patch(art_warning, get_pytorch_yolo):
 
     except ARTTestException as e:
         art_warning(e)
+
+
+def test_import_pytorch_yolo_loss_wrapper():
+    import torch
+    from art.estimators.object_detection.pytorch_yolo_loss_wrapper import PyTorchYoloLossWrapper
+
+    class DummyModel(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def loss(self, items):
+            return (torch.tensor([1.0]), [torch.tensor(1.0), torch.tensor(2.0), torch.tensor(3.0)])
+
+    test_model = DummyModel()
+    # Patch ultralytics import in the wrapper
+    import sys
+    import types
+
+    ultralytics_mock = types.SimpleNamespace(
+        models=types.SimpleNamespace(
+            yolo=types.SimpleNamespace(
+                detect=types.SimpleNamespace(DetectionPredictor=lambda: types.SimpleNamespace(args=None))
+            )
+        ),
+        utils=types.SimpleNamespace(
+            loss=types.SimpleNamespace(v8DetectionLoss=lambda m: None, E2EDetectLoss=lambda m: None)
+        ),
+    )
+    sys.modules["ultralytics"] = ultralytics_mock
+    sys.modules["ultralytics.models"] = ultralytics_mock.models
+    sys.modules["ultralytics.models.yolo"] = ultralytics_mock.models.yolo
+    sys.modules["ultralytics.models.yolo.detect"] = ultralytics_mock.models.yolo.detect
+    sys.modules["ultralytics.utils"] = ultralytics_mock.utils
+    sys.modules["ultralytics.utils.loss"] = ultralytics_mock.utils.loss
+    wrapper = PyTorchYoloLossWrapper(test_model, name="yolov8n")
+    assert isinstance(wrapper, PyTorchYoloLossWrapper)
diff --git a/tests/estimators/object_detection/test_pytorch_yolo_loss_wrapper.py b/tests/estimators/object_detection/test_pytorch_yolo_loss_wrapper.py