Trusted-AI
diff --git a/‎art/attacks/evasion/dpatch.py‎
Lines changed: 2 additions & 1 deletion b/‎art/attacks/evasion/dpatch.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎art/attacks/evasion/dpatch_robust.py‎
Lines changed: 101 additions & 11 deletions b/‎art/attacks/evasion/dpatch_robust.py‎
Lines changed: 101 additions & 11 deletions
diff --git a/‎art/estimators/object_detection/object_detector.py‎
Lines changed: 9 additions & 1 deletion b/‎art/estimators/object_detection/object_detector.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎art/estimators/object_detection/pytorch_faster_rcnn.py‎
Lines changed: 16 additions & 20 deletions b/‎art/estimators/object_detection/pytorch_faster_rcnn.py‎
Lines changed: 16 additions & 20 deletions
@@ -189,7 +189,7 @@ def generate(  # pylint: disable=W0221
 
         else:
 
-            predictions = self.estimator.predict(x=patched_images)
+            predictions = self.estimator.predict(x=patched_images, standardise_output=True)
 
             for i_image in range(patched_images.shape[0]):
                 target_dict = dict()
@@ -213,6 +213,7 @@ def generate(  # pylint: disable=W0221
                 gradients = self.estimator.loss_gradient(
                     x=patched_images[i_batch_start:i_batch_end],
                     y=patch_target[i_batch_start:i_batch_end],
+                    standardise_output=True,
                 )
 
                 for i_image in range(gradients.shape[0]):
 
@@ -59,12 +59,13 @@ class RobustDPatch(EvasionAttack):
         "learning_rate",
         "max_iter",
         "batch_size",
-        "verbose",
         "patch_location",
         "crop_range",
         "brightness_range",
         "rotation_weights",
         "sample_size",
+        "targeted",
+        "verbose",
     ]
 
     _estimator_requirements = (BaseEstimator, LossGradientsMixin, ObjectDetectorMixin)
@@ -81,6 +82,7 @@ def __init__(
         learning_rate: float = 5.0,
         max_iter: int = 500,
         batch_size: int = 16,
+        targeted: bool = False,
         verbose: bool = True,
     ):
         """
@@ -96,6 +98,7 @@ def __init__(
         :param learning_rate: The learning rate of the optimization.
         :param max_iter: The number of optimization steps.
         :param batch_size: The size of the training batch.
+        :param targeted: Indicates whether the attack is targeted (True) or untargeted (False).
         :param verbose: Show progress bars.
         """
 
@@ -120,9 +123,10 @@ def __init__(
         self.brightness_range = brightness_range
         self.rotation_weights = rotation_weights
         self.sample_size = sample_size
+        self._targeted = targeted
         self._check_params()
 
-    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
+    def generate(self, x: np.ndarray, y: Optional[List[Dict[str, np.ndarray]]] = None, **kwargs) -> np.ndarray:
         """
         Generate RobustDPatch.
 
@@ -133,7 +137,9 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
         channel_index = 1 if self.estimator.channels_first else x.ndim - 1
         if x.shape[channel_index] != self.patch_shape[channel_index - 1]:
             raise ValueError("The color channel index of the images and the patch have to be identical.")
-        if y is not None:
+        if y is None and self.targeted:
+            raise ValueError("The targeted version of RobustDPatch attack requires target labels provided to `y`.")
+        if y is not None and not self.targeted:
             raise ValueError("The RobustDPatch attack does not use target labels.")
         if x.ndim != 4:
             raise ValueError("The adversarial patch can only be applied to images.")
@@ -144,6 +150,24 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
         else:
             image_height, image_width = x.shape[1:3]
 
+        if not self.estimator.native_label_is_pytorch_format and y is not None:
+            from art.estimators.object_detection.utils import convert_tf_to_pt
+
+            y = convert_tf_to_pt(y=y, height=x.shape[1], width=x.shape[2])
+
+        if y is not None:
+            for i_image in range(x.shape[0]):
+                y_i = y[i_image]["boxes"]
+                for i_box in range(y_i.shape[0]):
+                    x_1, y_1, x_2, y_2 = y_i[i_box]
+                    if (
+                        x_1 < self.crop_range[1]
+                        or y_1 < self.crop_range[0]
+                        or x_2 > image_width - self.crop_range[1] + 1
+                        or y_2 > image_height - self.crop_range[0] + 1
+                    ):
+                        raise ValueError("Cropping is intersecting with at least one box, reduce `crop_range`.")
+
         if (
             self.patch_location[0] + self.patch_shape[0] > image_height - self.crop_range[0]
             or self.patch_location[1] + self.patch_shape[1] > image_width - self.crop_range[1]
@@ -165,14 +189,20 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
                     i_batch_start = i_batch * self.batch_size
                     i_batch_end = min((i_batch + 1) * self.batch_size, x.shape[0])
 
+                    if y is None:
+                        y_batch = y
+                    else:
+                        y_batch = y[i_batch_start:i_batch_end]
+
                     # Sample and apply the random transformations:
                     patched_images, patch_target, transforms = self._augment_images_with_patch(
-                        x[i_batch_start:i_batch_end], self._patch, channels_first=self.estimator.channels_first
+                        x[i_batch_start:i_batch_end], y_batch, self._patch, channels_first=self.estimator.channels_first
                     )
 
                     gradients = self.estimator.loss_gradient(
                         x=patched_images,
                         y=patch_target,
+                        standardise_output=True,
                     )
 
                     gradients = self._untransform_gradients(
@@ -187,7 +217,7 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
 
                     patch_gradients_old = patch_gradients
 
-            self._patch = self._patch + np.sign(patch_gradients) * self.learning_rate
+            self._patch = self._patch + np.sign(patch_gradients) * (1 - 2 * int(self.targeted)) * self.learning_rate
 
             if self.estimator.clip_values is not None:
                 self._patch = np.clip(
@@ -199,12 +229,13 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
         return self._patch
 
     def _augment_images_with_patch(
-        self, x: np.ndarray, patch: np.ndarray, channels_first: bool
+        self, x: np.ndarray, y: Optional[List[Dict[str, np.ndarray]]], patch: np.ndarray, channels_first: bool
     ) -> Tuple[np.ndarray, List[Dict[str, np.ndarray]], Dict[str, Union[int, float]]]:
         """
         Augment images with patch.
 
         :param x: Sample images.
+        :param y: Target labels.
         :param patch: The patch to be applied.
         :param channels_first: Set channels first or last.
         """
@@ -242,17 +273,73 @@ def _augment_images_with_patch(
 
         transformations.update({"rot90": rot90})
 
+        if y is not None:
+
+            y_copy: List[Dict[str, np.ndarray]] = list()
+
+            for i_image in range(x_copy.shape[0]):
+                y_b = y[i_image]["boxes"].copy()
+                image_width = x.shape[2]
+                image_height = x.shape[1]
+                x_1_arr = y_b[:, 0]
+                y_1_arr = y_b[:, 1]
+                x_2_arr = y_b[:, 2]
+                y_2_arr = y_b[:, 3]
+                box_width = x_2_arr - x_1_arr
+                box_height = y_2_arr - y_1_arr
+
+                if rot90 == 0:
+                    x_1_new = x_1_arr
+                    y_1_new = y_1_arr
+                    x_2_new = x_2_arr
+                    y_2_new = y_2_arr
+
+                if rot90 == 1:
+                    x_1_new = y_1_arr
+                    y_1_new = image_width - x_1_arr - box_width
+                    x_2_new = y_1_arr + box_height
+                    y_2_new = image_width - x_1_arr
+
+                if rot90 == 2:
+                    x_1_new = image_width - x_2_arr
+                    y_1_new = image_height - y_2_arr
+                    x_2_new = x_1_new + box_width
+                    y_2_new = y_1_new + box_height
+
+                if rot90 == 3:
+                    x_1_new = image_height - y_1_arr - box_height
+                    y_1_new = x_1_arr
+                    x_2_new = image_height - y_1_arr
+                    y_2_new = x_1_arr + box_width
+
+                y_i = dict()
+                y_i["boxes"] = np.zeros_like(y[i_image]["boxes"])
+                y_i["boxes"][:, 0] = x_1_new
+                y_i["boxes"][:, 1] = y_1_new
+                y_i["boxes"][:, 2] = x_2_new
+                y_i["boxes"][:, 3] = y_2_new
+
+                y_i["labels"] = y[i_image]["labels"]
+                y_i["scores"] = y[i_image]["scores"]
+
+                y_copy.append(y_i)
+
         # 3) adjust brightness:
         brightness = random.uniform(*self.brightness_range)
-        x_copy = np.round(brightness * x_copy)
-        x_patch = np.round(brightness * x_patch)
+        x_copy = np.round(brightness * x_copy / self.learning_rate) * self.learning_rate
+        x_patch = np.round(brightness * x_patch / self.learning_rate) * self.learning_rate
 
         transformations.update({"brightness": brightness})
 
         logger.debug("Transformations: %s", str(transformations))
 
         patch_target: List[Dict[str, np.ndarray]] = list()
-        predictions = self.estimator.predict(x=x_copy)
+
+        if self.targeted:
+            predictions = y_copy
+        else:
+            predictions = self.estimator.predict(x=x_copy, standardise_output=True)
+
         for i_image in range(x_copy.shape[0]):
             target_dict = dict()
             target_dict["boxes"] = predictions[i_image]["boxes"]
@@ -385,8 +472,8 @@ def _check_params(self) -> None:
         if len(self.brightness_range) != 2:
             raise ValueError("The length of brightness range must be 2.")
 
-        if self.brightness_range[0] < 0.0 or self.brightness_range[1] > 1.0:
-            raise ValueError("The brightness range must be between 0.0 and 1.0.")
+        if self.brightness_range[0] < 0.0:
+            raise ValueError("The brightness range must be >= 0.0.")
 
         if self.brightness_range[0] > self.brightness_range[1]:
             raise ValueError("The first element of the brightness range must be less or equal to the second one.")
@@ -408,3 +495,6 @@ def _check_params(self) -> None:
             raise ValueError("The EOT sample size must be of type int.")
         if self.sample_size <= 0:
             raise ValueError("The EOT sample size must be greater than 0.")
+
+        if not isinstance(self.targeted, bool):
+            raise ValueError("The argument `targeted` has to be of type bool.")
@@ -19,7 +19,7 @@
 This module implements mixin abstract base class for all object detectors in ART.
 """
 
-from abc import ABC
+from abc import ABC, abstractmethod
 
 from art.estimators.estimator import BaseEstimator
 from art.estimators.classification.classifier import LossGradientsMixin
@@ -30,6 +30,14 @@ class ObjectDetectorMixin(ABC):
     Mix-in Base class for ART object detectors.
     """
 
+    @property
+    @abstractmethod
+    def native_label_is_pytorch_format(self) -> bool:
+        """
+        Are the native labels in PyTorch format [x1, y1, x2, y2]?
+        """
+        raise NotImplementedError
+
 
 class ObjectDetector(ObjectDetectorMixin, LossGradientsMixin, BaseEstimator, ABC):
     """
 
@@ -131,6 +131,13 @@ def __init__(
         self._model.eval()
         self.attack_losses: Tuple[str, ...] = attack_losses
 
+    @property
+    def native_label_is_pytorch_format(self) -> bool:
+        """
+        Are the native labels in PyTorch format [x1, y1, x2, y2]?
+        """
+        return True
+
     @property
     def input_shape(self) -> Tuple[int, ...]:
         """
@@ -156,13 +163,12 @@ def loss_gradient(
         Compute the gradient of the loss function w.r.t. `x`.
 
         :param x: Samples of shape (nb_samples, height, width, nb_channels).
-        :param y: Target values of format `List[Dict[Tensor]]`, one for each input image. The
-                  fields of the Dict are as follows:
+        :param y: Target values of format `List[Dict[Tensor]]`, one for each input image. The fields of the Dict are as
+                  follows:
 
-                  - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values \
-                    between 0 and H and 0 and W
-                  - labels (Int64Tensor[N]): the predicted labels for each image
-                  - scores (Tensor[N]): the scores or each prediction.
+                  - boxes (FloatTensor[N, 4]): the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and
+                                               0 <= y1 < y2 <= H.
+                  - labels (Int64Tensor[N]): the labels for each image
         :return: Loss gradients of the same shape as `x`.
         """
         import torch  # lgtm [py/repeated-import]
@@ -181,7 +187,6 @@ def loss_gradient(
                     y_t = dict()
                     y_t["boxes"] = torch.from_numpy(y_i["boxes"]).type(torch.float).to(self._device)
                     y_t["labels"] = torch.from_numpy(y_i["labels"]).type(torch.int64).to(self._device)
-                    y_t["scores"] = torch.from_numpy(y_i["scores"]).to(self._device)
                     y_tensor.append(y_t)
             else:
                 y_tensor = y
@@ -215,7 +220,6 @@ def loss_gradient(
                     y_preprocessed_t = dict()
                     y_preprocessed_t["boxes"] = torch.from_numpy(y_i["boxes"]).type(torch.float).to(self._device)
                     y_preprocessed_t["labels"] = torch.from_numpy(y_i["labels"]).type(torch.int64).to(self._device)
-                    y_preprocessed_t["scores"] = torch.from_numpy(y_i["scores"]).to(self._device)
                     y_preprocessed_tensor.append(y_preprocessed_t)
                 y_preprocessed = y_preprocessed_tensor
 
@@ -286,12 +290,11 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
 
         :param x: Samples of shape (nb_samples, height, width, nb_channels).
         :param batch_size: Batch size.
-        :return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The
-                 fields of the Dict are as follows:
+        :return: Predictions of format `List[Dict[str, np.ndarray]]`, one for each input image. The fields of the Dict
+                 are as follows:
 
-                 - boxes [N, 4]: the predicted boxes in [x1, y1, x2, y2] format, with values \
-                   between 0 and H and 0 and W
-                 - labels [N]: the predicted labels for each image
+                 - boxes [N, 4]: the boxes in [x1, y1, x2, y2] format, with 0 <= x1 < x2 <= W and 0 <= y1 < y2 <= H.
+                 - labels [N]: the labels for each image
                  - scores [N]: the scores or each prediction.
         """
         import torchvision  # lgtm [py/repeated-import]
@@ -330,12 +333,5 @@ def get_activations(
     def compute_loss(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
         """
         Compute the loss of the neural network for samples `x`.
-
-        :param x: Samples of shape (nb_samples, nb_features) or (nb_samples, nb_pixels_1, nb_pixels_2,
-                  nb_channels) or (nb_samples, nb_channels, nb_pixels_1, nb_pixels_2).
-        :param y: Target values (class labels) one-hot-encoded of shape `(nb_samples, nb_classes)` or indices
-                  of shape `(nb_samples,)`.
-        :return: Loss values.
-        :rtype: Format as expected by the `model`
         """
         raise NotImplementedError