Merge pull request #599 from Trusted-AI/development_dpatch

beat-buesser · web-flow · commit 089b8d00929c · 2020-09-19T22:43:53.000+01:00
Add targeted version of DPatch
diff --git a/art/attacks/evasion/dpatch.py b/art/attacks/evasion/dpatch.py
@@ -23,7 +23,7 @@
 import logging
 import math
 import random
-from typing import Dict, List, Optional, Tuple, TYPE_CHECKING
+from typing import Dict, List, Optional, Tuple, Union, TYPE_CHECKING
 
 import numpy as np
 from tqdm import trange
@@ -78,15 +78,26 @@ def __init__(
         self.learning_rate = learning_rate
         self.max_iter = max_iter
         self.batch_size = batch_size
-        self._patch = np.ones(shape=patch_shape) * (self.estimator.clip_values[1] + self.estimator.clip_values[0]) / 2.0
+        self._patch = np.random.randint(
+            self.estimator.clip_values[0], self.estimator.clip_values[1], size=patch_shape
+        ).astype(np.float32)
         self._check_params()
 
-    def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
+        self.target_label = []
+
+    def generate(
+        self,
+        x: np.ndarray,
+        y: Optional[np.ndarray] = None,
+        target_label: Optional[Union[int, List[int], np.ndarray]] = None,
+        **kwargs
+    ) -> np.ndarray:
         """
         Generate DPatch.
 
         :param x: Sample images.
         :param y: Target labels for object detector.
+        :param target_label: The target label of the DPatch attack.
         :return: Adversarial patch.
         """
         channel_index = 1 if self.estimator.channels_first else x.ndim - 1
@@ -96,6 +107,17 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
             raise ValueError("The DPatch attack does not use target labels.")
         if x.ndim != 4:
             raise ValueError("The adversarial patch can only be applied to images.")
+        if target_label is not None:
+            if isinstance(target_label, int):
+                self.target_label = [target_label] * x.shape[0]
+            elif isinstance(target_label, np.ndarray):
+                if not (target_label.shape == (x.shape[0], 1) or target_label.shape == (x.shape[0],)):
+                    raise ValueError("The target_label has to be a 1-dimensional array.")
+                self.target_label = target_label.tolist()
+            else:
+                if not len(target_label) == x.shape[0] or not isinstance(target_label, list):
+                    raise ValueError("The target_label as list of integers needs to of length number of images in `x`.")
+                self.target_label = target_label
 
         for i_step in trange(self.max_iter, desc="DPatch iteration"):
             if i_step == 0 or (i_step + 1) % 100 == 0:
@@ -106,19 +128,32 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
             )
             patch_target: List[Dict[str, np.ndarray]] = list()
 
-            for i_image in range(patched_images.shape[0]):
+            if self.target_label:
+
+                for i_image in range(patched_images.shape[0]):
+                    i_x_1 = transforms[i_image]["i_x_1"]
+                    i_x_2 = transforms[i_image]["i_x_2"]
+                    i_y_1 = transforms[i_image]["i_y_1"]
+                    i_y_2 = transforms[i_image]["i_y_2"]
 
-                i_x_1 = transforms[i_image]["i_x_1"]
-                i_x_2 = transforms[i_image]["i_x_2"]
-                i_y_1 = transforms[i_image]["i_y_1"]
-                i_y_2 = transforms[i_image]["i_y_2"]
+                    target_dict = dict()
+                    target_dict["boxes"] = np.asarray([[i_x_1, i_y_1, i_x_2, i_y_2]])
+                    target_dict["labels"] = np.asarray([self.target_label[i_image],])
+                    target_dict["scores"] = np.asarray([1.0,])
 
-                target_dict = dict()
-                target_dict["boxes"] = np.asarray([[i_x_1, i_y_1, i_x_2, i_y_2]])
-                target_dict["labels"] = np.asarray([1,])
-                target_dict["scores"] = np.asarray([1.0,])
+                    patch_target.append(target_dict)
 
-                patch_target.append(target_dict)
+            else:
+
+                predictions = self.estimator.predict(x=patched_images)
+
+                for i_image in range(patched_images.shape[0]):
+                    target_dict = dict()
+                    target_dict["boxes"] = predictions[i_image]["boxes"].detach().cpu().numpy()
+                    target_dict["labels"] = predictions[i_image]["labels"].detach().cpu().numpy()
+                    target_dict["scores"] = predictions[i_image]["scores"].detach().cpu().numpy()
+
+                    patch_target.append(target_dict)
 
             num_batches = math.ceil(x.shape[0] / self.batch_size)
             patch_gradients = np.zeros_like(self._patch)
@@ -131,7 +166,7 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
                     x=patched_images[i_batch_start:i_batch_end], y=patch_target[i_batch_start:i_batch_end],
                 )
 
-                for i_image in range(self.batch_size):
+                for i_image in range(patched_images.shape[0]):
 
                     i_x_1 = transforms[i_batch_start + i_image]["i_x_1"]
                     i_x_2 = transforms[i_batch_start + i_image]["i_x_2"]
@@ -143,9 +178,13 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
                     else:
                         patch_gradients_i = gradients[i_image, i_x_1:i_x_2, i_y_1:i_y_2, :]
 
-                    patch_gradients += patch_gradients_i
+                    patch_gradients = patch_gradients + patch_gradients_i
+
+            if self.target_label:
+                self._patch = self._patch - np.sign(patch_gradients) * self.learning_rate
+            else:
+                self._patch = self._patch + np.sign(patch_gradients) * self.learning_rate
 
-            self._patch -= patch_gradients * self.learning_rate
             self._patch = np.clip(
                 self._patch, a_min=self.estimator.clip_values[0], a_max=self.estimator.clip_values[1],
             )
diff --git a/art/estimators/object_detection/pytorch_faster_rcnn.py b/art/estimators/object_detection/pytorch_faster_rcnn.py
@@ -19,7 +19,7 @@
 This module implements the task specific estimator for Faster R-CNN v3 in PyTorch.
 """
 import logging
-from typing import List, Optional, Tuple, Union, TYPE_CHECKING
+from typing import List, Dict, Optional, Tuple, Union, TYPE_CHECKING
 
 import numpy as np
 
@@ -29,6 +29,7 @@
 
 if TYPE_CHECKING:
     # pylint: disable=C0412
+    import torch
     import torchvision
 
     from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE
@@ -134,7 +135,7 @@ def __init__(
         self._model.eval()
         self.attack_losses: Tuple[str, ...] = attack_losses
 
-    def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
+    def loss_gradient(self, x: np.ndarray, y: List[Dict[str, np.ndarray]], **kwargs) -> np.ndarray:
         """
         Compute the gradient of the loss function w.r.t. `x`.
 
@@ -158,9 +159,9 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
 
         if y is not None:
             for i, y_i in enumerate(y):
-                y[i]["boxes"] = torch.tensor(y_i["boxes"], dtype=torch.float).to(self._device)
-                y[i]["labels"] = torch.tensor(y_i["labels"], dtype=torch.int64).to(self._device)
-                y[i]["scores"] = torch.tensor(y_i["scores"]).to(self._device)
+                y[i]["boxes"] = torch.from_numpy(y_i["boxes"]).type(torch.float).to(self._device)
+                y[i]["labels"] = torch.from_numpy(y_i["labels"]).type(torch.int64).to(self._device)
+                y[i]["scores"] = torch.from_numpy(y_i["scores"]).to(self._device)
 
         transform = torchvision.transforms.Compose([torchvision.transforms.ToTensor()])
         image_tensor_list = list()
@@ -207,13 +208,13 @@ def loss_gradient(self, x: np.ndarray, y: np.ndarray, **kwargs) -> np.ndarray:
 
         return grads
 
-    def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> np.ndarray:
+    def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[str, "torch.Tensor"]]:
         """
         Perform prediction for a batch of inputs.
 
         :param x: Samples of shape (nb_samples, height, width, nb_channels).
         :param batch_size: Batch size.
-        :return: Predictions of format `List[Dict[Tensor]]`, one for each input image. The
+        :return: Predictions of format `List[Dict[str, Tensor]]`, one for each input image. The
                  fields of the Dict are as follows:
 
                  - boxes (FloatTensor[N, 4]): the predicted boxes in [x1, y1, x2, y2] format, with values \
diff --git a/tests/attacks/evasion/test_dpatch.py b/tests/attacks/evasion/test_dpatch.py
@@ -130,6 +130,39 @@ def test_augment_images_with_patch(random_location, image_format, fix_get_mnist_
     np.testing.assert_array_equal(patched_images[1, 2, :, 0], patched_images_column)
 
 
+def test_exceptions(get_default_mnist_subset, image_dl_estimator):
+    class ObjectDetector(BaseEstimator, LossGradientsMixin, ObjectDetectorMixin):
+
+        clip_values = (0, 1)
+        channels_first = False
+
+        def fit(self):
+            pass
+
+        def loss_gradient(self, x, y, **kwargs):
+            pass
+
+        def predict(self, x, **kwargs):
+            pass
+
+    estimator = ObjectDetector()
+
+    (x_train_mnist, y_train_mnist), (_, _) = get_default_mnist_subset
+
+    attack = DPatch(estimator=estimator, patch_shape=(4, 4, 1), learning_rate=5.0, max_iter=5, batch_size=16,)
+
+    with pytest.raises(ValueError, match="The DPatch attack does not use target labels."):
+        attack.generate(x=x_train_mnist, y=y_train_mnist)
+
+    with pytest.raises(
+        ValueError, match="The target_label as list of integers needs to of length number of images in" " `x`."
+    ):
+        attack.generate(x=x_train_mnist, y=None, target_label=[1, 2, 3])
+
+    with pytest.raises(ValueError, match="The target_label has to be a 1-dimensional array."):
+        attack.generate(x=x_train_mnist, y=None, target_label=np.asarray([[1, 2, 3], [4, 5, 6]]))
+
+
 def test_classifier_type_check_fail():
     backend_test_classifier_type_check_fail(DPatch, [BaseEstimator, LossGradientsMixin, ObjectDetectorMixin])