Add support for video to AdversarialPatch

Beat Buesser · Beat Buesser · commit 990a8f99bdd7 · 2020-08-20T12:18:44.000+01:00
Signed-off-by: Beat Buesser &lt;beat.buesser@ie.ibm.com&gt;
diff --git a/art/attacks/evasion/adversarial_patch/adversarial_patch_numpy.py b/art/attacks/evasion/adversarial_patch/adversarial_patch_numpy.py
@@ -103,17 +103,31 @@ def __init__(
         self.clip_patch = clip_patch
         self._check_params()
 
-        if len(self.estimator.input_shape) not in [3]:
-            raise ValueError("Wrong input_shape in estimator detected. AdversarialPatch is expecting images as input.")
+        if len(self.estimator.input_shape) not in [3, 4]:
+            raise ValueError(
+                "Unexpected input_shape in estimator detected. AdversarialPatch is expecting images or videos as input."
+            )
 
         self.image_shape = self.estimator.input_shape
 
-        if self.estimator.channels_first:
-            self.i_h = 1
-            self.i_w = 2
-        else:
-            self.i_h = 0
-            self.i_w = 1
+        self.i_h_patch = 0
+        self.i_w_patch = 1
+
+        self.nb_dims = len(self.image_shape)
+        if self.nb_dims == 3:
+            if self.estimator.channels_first:
+                self.i_h = 1
+                self.i_w = 2
+            else:
+                self.i_h = 0
+                self.i_w = 1
+        elif self.nb_dims == 4:
+            if self.estimator.channels_first:
+                self.i_h = 2
+                self.i_w = 3
+            else:
+                self.i_h = 1
+                self.i_w = 2
 
         if self.estimator.channels_first:
             smallest_image_edge = np.minimum(self.image_shape[1], self.image_shape[2])
@@ -246,9 +260,15 @@ def _get_circular_patch_mask(self, sharpness: int = 40) -> np.ndarray:
         pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - mask.shape[self.i_w])
 
         if self.estimator.channels_first:
-            pad_width = ((0, 0), (pad_h_before, pad_h_after), (pad_w_before, pad_w_after))
+            if self.nb_dims == 3:
+                pad_width = ((0, 0), (pad_h_before, pad_h_after), (pad_w_before, pad_w_after))
+            elif self.nb_dims == 4:
+                pad_width = ((0, 0), (0, 0), (pad_h_before, pad_h_after), (pad_w_before, pad_w_after))
         else:
-            pad_width = ((pad_h_before, pad_h_after), (pad_w_before, pad_w_after), (0, 0))
+            if self.nb_dims == 3:
+                pad_width = ((pad_h_before, pad_h_after), (pad_w_before, pad_w_after), (0, 0))
+            elif self.nb_dims == 4:
+                pad_width = ((0, 0), (pad_h_before, pad_h_after), (pad_w_before, pad_w_after), (0, 0))
 
         mask = np.pad(mask, pad_width=pad_width, mode="constant", constant_values=(0, 0),)
 
@@ -291,11 +311,19 @@ def _scale(self, x, scale):
         height = None
         width = None
         if self.estimator.channels_first:
-            zooms = (1.0, scale, scale)
-            height, width = self.patch_shape[1:3]
+            if self.nb_dims == 3:
+                zooms = (1.0, scale, scale)
+                height, width = self.patch_shape[1:3]
+            elif self.nb_dims == 4:
+                zooms = (1.0, 1.0, scale, scale)
+                height, width = self.patch_shape[2:4]
         elif not self.estimator.channels_first:
-            zooms = (scale, scale, 1.0)
-            height, width = self.patch_shape[0:2]
+            if self.nb_dims == 3:
+                zooms = (scale, scale, 1.0)
+                height, width = self.patch_shape[0:2]
+            elif self.nb_dims == 4:
+                zooms = (1.0, scale, scale, 1.0)
+                height, width = self.patch_shape[1:3]
 
         if scale < 1.0:
             scale_h = int(np.round(height * scale))
@@ -306,9 +334,15 @@ def _scale(self, x, scale):
             x_out = np.zeros_like(x)
 
             if self.estimator.channels_first:
-                x_out[:, top : top + scale_h, left : left + scale_w] = zoom(x, zoom=zooms, order=1)
+                if self.nb_dims == 3:
+                    x_out[:, top : top + scale_h, left : left + scale_w] = zoom(x, zoom=zooms, order=1)
+                elif self.nb_dims == 4:
+                    x_out[:, :, top : top + scale_h, left : left + scale_w] = zoom(x, zoom=zooms, order=1)
             else:
-                x_out[top : top + scale_h, left : left + scale_w, :] = zoom(x, zoom=zooms, order=1)
+                if self.nb_dims == 3:
+                    x_out[top : top + scale_h, left : left + scale_w, :] = zoom(x, zoom=zooms, order=1)
+                elif self.nb_dims == 4:
+                    x_out[:, top : top + scale_h, left : left + scale_w, :] = zoom(x, zoom=zooms, order=1)
 
         elif scale > 1.0:
             scale_h = int(np.round(height / scale)) + 1
@@ -317,17 +351,29 @@ def _scale(self, x, scale):
             left = (width - scale_w) // 2
 
             if self.estimator.channels_first:
-                x_out = zoom(x[:, top : top + scale_h, left : left + scale_w], zoom=zooms, order=1)
+                if self.nb_dims == 3:
+                    x_out = zoom(x[:, top : top + scale_h, left : left + scale_w], zoom=zooms, order=1)
+                elif self.nb_dims == 4:
+                    x_out = zoom(x[:, :, top : top + scale_h, left : left + scale_w], zoom=zooms, order=1)
             else:
-                x_out = zoom(x[top : top + scale_h, left : left + scale_w, :], zoom=zooms, order=1)
+                if self.nb_dims == 3:
+                    x_out = zoom(x[top : top + scale_h, left : left + scale_w, :], zoom=zooms, order=1)
+                elif self.nb_dims == 4:
+                    x_out = zoom(x[:, top : top + scale_h, left : left + scale_w, :], zoom=zooms, order=1)
 
             cut_top = (x_out.shape[self.i_h] - height) // 2
             cut_left = (x_out.shape[self.i_w] - width) // 2
 
             if self.estimator.channels_first:
-                x_out = x_out[:, cut_top : cut_top + height, cut_left : cut_left + width]
+                if self.nb_dims == 3:
+                    x_out = x_out[:, cut_top : cut_top + height, cut_left : cut_left + width]
+                elif self.nb_dims == 4:
+                    x_out = x_out[:, :, cut_top : cut_top + height, cut_left : cut_left + width]
             else:
-                x_out = x_out[cut_top : cut_top + height, cut_left : cut_left + width, :]
+                if self.nb_dims == 3:
+                    x_out = x_out[cut_top : cut_top + height, cut_left : cut_left + width, :]
+                elif self.nb_dims == 4:
+                    x_out = x_out[:, cut_top : cut_top + height, cut_left : cut_left + width, :]
 
         else:
             x_out = x
@@ -338,9 +384,15 @@ def _scale(self, x, scale):
 
     def _shift(self, x, shift_h, shift_w):
         if self.estimator.channels_first:
-            shift_hw = (0, shift_h, shift_w)
+            if self.nb_dims == 3:
+                shift_hw = (0, shift_h, shift_w)
+            elif self.nb_dims == 4:
+                shift_hw = (0, 0, shift_h, shift_w)
         else:
-            shift_hw = (shift_h, shift_w, 0)
+            if self.nb_dims == 3:
+                shift_hw = (shift_h, shift_w, 0)
+            elif self.nb_dims == 4:
+                shift_hw = (0, shift_h, shift_w, 0)
         return shift(x, shift=shift_hw, order=1)
 
     def _random_transformation(self, patch, scale):
diff --git a/art/attacks/evasion/adversarial_patch/adversarial_patch_tensorflow.py b/art/attacks/evasion/adversarial_patch/adversarial_patch_tensorflow.py
@@ -108,6 +108,9 @@ def __init__(
         if self.estimator.channels_first:
             raise ValueError("Color channel needs to be in last dimension.")
 
+        self.i_h_patch = 0
+        self.i_w_patch = 1
+
         self.nb_dims = len(self.image_shape)
         if self.nb_dims == 3:
             self.i_h = 0
@@ -197,7 +200,7 @@ def _get_circular_patch_mask(self, nb_samples: int, sharpness: int = 40) -> "tf.
         """
         import tensorflow as tf  # lgtm [py/repeated-import]
 
-        diameter = np.minimum(self.patch_shape[self.i_h], self.patch_shape[self.i_w])
+        diameter = np.minimum(self.patch_shape[self.i_h_patch], self.patch_shape[self.i_w_patch])
 
         x = np.linspace(-1, 1, diameter)
         y = np.linspace(-1, 1, diameter)
@@ -230,11 +233,11 @@ def _random_overlay(self, images: np.ndarray, patch: np.ndarray, scale: Optional
             name=None,
         )
 
-        pad_h_before = int((self.image_shape[self.i_h] - image_mask.shape[self.i_h + 1]) / 2)
-        pad_h_after = int(self.image_shape[self.i_h] - pad_h_before - image_mask.shape[self.i_h + 1])
+        pad_h_before = int((self.image_shape[self.i_h] - image_mask.shape[self.i_h_patch + 1]) / 2)
+        pad_h_after = int(self.image_shape[self.i_h] - pad_h_before - image_mask.shape[self.i_h_patch + 1])
 
-        pad_w_before = int((self.image_shape[self.i_w] - image_mask.shape[self.i_w + 1]) / 2)
-        pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - image_mask.shape[self.i_w + 1])
+        pad_w_before = int((self.image_shape[self.i_w] - image_mask.shape[self.i_w_patch + 1]) / 2)
+        pad_w_after = int(self.image_shape[self.i_w] - pad_w_before - image_mask.shape[self.i_w_patch + 1])
 
         image_mask = tf.pad(
             image_mask,
@@ -294,8 +297,8 @@ def _random_overlay(self, images: np.ndarray, patch: np.ndarray, scale: Optional
             a0, a1 = xform_matrix[0]
             b0, b1 = xform_matrix[1]
 
-            x_origin = float(self.image_shape[1]) / 2
-            y_origin = float(self.image_shape[0]) / 2
+            x_origin = float(self.image_shape[self.i_w]) / 2
+            y_origin = float(self.image_shape[self.i_h]) / 2
 
             x_origin_shifted, y_origin_shifted = np.matmul(xform_matrix, np.array([x_origin, y_origin]))
 
@@ -307,10 +310,16 @@ def _random_overlay(self, images: np.ndarray, patch: np.ndarray, scale: Optional
 
             transform_vectors.append(np.array([a0, a1, a2, b0, b1, b2, 0, 0]).astype(np.float32))
 
-        image_mask = tfa.image.transform(image_mask, transform_vectors, "BILINEAR", output_shape=self.image_shape[:2])
-        padded_patch = tfa.image.transform(
-            padded_patch, transform_vectors, "BILINEAR", output_shape=self.image_shape[:2]
-        )
+        image_mask = tfa.image.transform(image_mask, transform_vectors, "BILINEAR",)
+        padded_patch = tfa.image.transform(padded_patch, transform_vectors, "BILINEAR",)
+
+        if self.nb_dims == 4:
+            image_mask = tf.stack([image_mask] * 15, axis=1)
+            image_mask = tf.cast(image_mask, images.dtype)
+
+            padded_patch = tf.stack([padded_patch] * 15, axis=1)
+            padded_patch = tf.cast(padded_patch, images.dtype)
+
         inverted_mask = 1 - image_mask
 
         return images * inverted_mask + padded_patch * image_mask
diff --git a/tests/attacks/test_adversarial_patch.py b/tests/attacks/test_adversarial_patch.py
@@ -170,7 +170,7 @@ def test_failure_feature_vectors(self):
             _ = AdversarialPatch(classifier=classifier)
 
         self.assertIn(
-            "Wrong input_shape in estimator detected. AdversarialPatch is expecting images as input.",
+            "Unexpected input_shape in estimator detected. AdversarialPatch is expecting images or videos as input.",
             str(context.exception),
         )
 

Original file line number	Diff line number	Diff line change
`@@ -170,7 +170,7 @@ def test_failure_feature_vectors(self):`
`170`	`170`	`_ = AdversarialPatch(classifier=classifier)`
`171`	`171`
`172`	`172`	`self.assertIn(`
`173`		`- "Wrong input_shape in estimator detected. AdversarialPatch is expecting images as input.",`
	`173`	`+ "Unexpected input_shape in estimator detected. AdversarialPatch is expecting images or videos as input.",`
`174`	`174`	`str(context.exception),`
`175`	`175`	`)`
`176`	`176`