Merge pull request #1678 from GiulioZizzo/randomised_smoothing_fix

beat-buesser · web-flow · commit 6d987491102f · 2022-05-14T22:59:34.000+01:00
Randomised smoothing fix
diff --git a/art/estimators/certification/randomized_smoothing/numpy.py b/art/estimators/certification/randomized_smoothing/numpy.py
@@ -25,11 +25,14 @@
 import logging
 from typing import List, Union, TYPE_CHECKING, Tuple
 
+import warnings
 import numpy as np
 
+from art.config import ART_NUMPY_DTYPE
 from art.estimators.estimator import BaseEstimator, LossGradientsMixin, NeuralNetworkMixin
 from art.estimators.certification.randomized_smoothing.randomized_smoothing import RandomizedSmoothingMixin
 from art.estimators.classification import ClassifierMixin, ClassGradientsMixin
+from art.defences.preprocessor.gaussian_augmentation import GaussianAugmentation
 
 if TYPE_CHECKING:
     from art.utils import CLASSIFIER_NEURALNETWORK_TYPE
@@ -69,6 +72,12 @@ def __init__(
         :param scale: Standard deviation of Gaussian noise added.
         :param alpha: The failure probability of smoothing
         """
+        if classifier.preprocessing_defences is not None:
+            warnings.warn(
+                "\n With the current backend Gaussian noise will be added by Randomized Smoothing "
+                "BEFORE the application of preprocessing defences. Please ensure this conforms to your use case.\n"
+            )
+
         super().__init__(
             model=classifier.model,
             channels_first=classifier.channels_first,
@@ -112,7 +121,12 @@ def _fit_classifier(self, x: np.ndarray, y: np.ndarray, batch_size: int, nb_epoc
         :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch
                        and providing it takes no effect.
         """
-        return self.classifier.fit(x, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
+
+        g_a = GaussianAugmentation(sigma=self.scale, augmentation=False)
+        for _ in range(nb_epochs):
+            x_rs, _ = g_a(x)
+            x_rs = x_rs.astype(ART_NUMPY_DTYPE)
+            self.classifier.fit(x_rs, y, batch_size=batch_size, nb_epochs=1, **kwargs)
 
     def loss_gradient(  # pylint: disable=W0221
         self, x: np.ndarray, y: np.ndarray, training_mode: bool = False, **kwargs
diff --git a/art/estimators/certification/randomized_smoothing/pytorch.py b/art/estimators/certification/randomized_smoothing/pytorch.py
@@ -25,11 +25,15 @@
 import logging
 from typing import List, Optional, Tuple, Union, TYPE_CHECKING
 
+import warnings
+import random
+from tqdm import tqdm
 import numpy as np
 
 from art.config import ART_NUMPY_DTYPE
 from art.estimators.classification.pytorch import PyTorchClassifier
 from art.estimators.certification.randomized_smoothing.randomized_smoothing import RandomizedSmoothingMixin
+from art.utils import check_and_transform_label_format
 
 if TYPE_CHECKING:
     # pylint: disable=C0412
@@ -94,6 +98,12 @@ def __init__(
         :param scale: Standard deviation of Gaussian noise added.
         :param alpha: The failure probability of smoothing.
         """
+        if preprocessing_defences is not None:
+            warnings.warn(
+                "\n With the current backend (Pytorch) Gaussian noise will be added by Randomized Smoothing "
+                "AFTER the application of preprocessing defences. Please ensure this conforms to your use case.\n"
+            )
+
         super().__init__(
             model=model,
             loss=loss,
@@ -126,26 +136,72 @@ def fit(  # pylint: disable=W0221
         batch_size: int = 128,
         nb_epochs: int = 10,
         training_mode: bool = True,
-        **kwargs
-    ):
+        **kwargs,
+    ) -> None:
         """
         Fit the classifier on the training set `(x, y)`.
 
         :param x: Training data.
-        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
-                  (nb_samples,).
-        :param batch_size: Batch size.
-        :key nb_epochs: Number of epochs to use for training
+        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
+                  shape (nb_samples,).
+        :param batch_size: Size of batches.
+        :param nb_epochs: Number of epochs to use for training.
+        :param training_mode: `True` for model set to training mode and `'False` for model set to evaluation mode.
         :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch
                and providing it takes no effect.
-        :type kwargs: `dict`
-        :return: `None`
         """
+        import torch  # lgtm [py/repeated-import]
 
         # Set model mode
         self._model.train(mode=training_mode)
 
-        RandomizedSmoothingMixin.fit(self, x, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
+        if self._optimizer is None:  # pragma: no cover
+            raise ValueError("An optimizer is needed to train the model, but none for provided.")
+
+        y = check_and_transform_label_format(y, self.nb_classes)
+
+        # Apply preprocessing
+        x_preprocessed, y_preprocessed = self._apply_preprocessing(x, y, fit=True)
+
+        # Check label shape
+        y_preprocessed = self.reduce_labels(y_preprocessed)
+
+        num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
+        ind = np.arange(len(x_preprocessed))
+        std = torch.tensor(self.scale).to(self._device)
+        # Start training
+        for _ in tqdm(range(nb_epochs)):
+            # Shuffle the examples
+            random.shuffle(ind)
+
+            # Train for one epoch
+            for m in range(num_batch):
+                i_batch = torch.from_numpy(x_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
+                o_batch = torch.from_numpy(y_preprocessed[ind[m * batch_size : (m + 1) * batch_size]]).to(self._device)
+
+                # Add random noise for randomized smoothing
+                i_batch = i_batch + torch.randn_like(i_batch, device=self._device) * std
+
+                # Zero the parameter gradients
+                self._optimizer.zero_grad()
+
+                # Perform prediction
+                model_outputs = self._model(i_batch)
+
+                # Form the loss function
+                loss = self._loss(model_outputs[-1], o_batch)  # lgtm [py/call-to-non-callable]
+
+                # Do training
+                if self._use_amp:  # pragma: no cover
+                    from apex import amp  # pylint: disable=E0611
+
+                    with amp.scale_loss(loss, self._optimizer) as scaled_loss:
+                        scaled_loss.backward()
+
+                else:
+                    loss.backward()
+
+                self._optimizer.step()
 
     def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> np.ndarray:  # type: ignore
         """
diff --git a/art/estimators/certification/randomized_smoothing/randomized_smoothing.py b/art/estimators/certification/randomized_smoothing/randomized_smoothing.py
@@ -31,7 +31,6 @@
 from tqdm.auto import tqdm
 
 from art.config import ART_NUMPY_DTYPE
-from art.defences.preprocessor.gaussian_augmentation import GaussianAugmentation
 
 logger = logging.getLogger(__name__)
 
@@ -141,9 +140,7 @@ def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: in
         :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch
                and providing it takes no effect.
         """
-        g_a = GaussianAugmentation(sigma=self.scale, augmentation=False)
-        x_rs, _ = g_a(x)
-        self._fit_classifier(x_rs, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
+        self._fit_classifier(x, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
 
     def certify(self, x: np.ndarray, n: int, batch_size: int = 32) -> Tuple[np.ndarray, np.ndarray]:
         """
diff --git a/art/estimators/certification/randomized_smoothing/tensorflow.py b/art/estimators/certification/randomized_smoothing/tensorflow.py
@@ -25,10 +25,13 @@
 import logging
 from typing import Callable, List, Optional, Tuple, Union, TYPE_CHECKING
 
+import warnings
+from tqdm import tqdm
 import numpy as np
 
 from art.estimators.classification.tensorflow import TensorFlowV2Classifier
 from art.estimators.certification.randomized_smoothing.randomized_smoothing import RandomizedSmoothingMixin
+from art.utils import check_and_transform_label_format
 
 if TYPE_CHECKING:
     # pylint: disable=C0412
@@ -91,6 +94,12 @@ def __init__(
         :param scale: Standard deviation of Gaussian noise added.
         :param alpha: The failure probability of smoothing.
         """
+        if preprocessing_defences is not None:
+            warnings.warn(
+                "\nWith the current backend (Tensorflow), Gaussian noise will be added by Randomized Smoothing "
+                "AFTER the application of preprocessing defences. Please ensure this conforms to your use case.\n"
+            )
+
         super().__init__(
             model=model,
             nb_classes=nb_classes,
@@ -113,21 +122,41 @@ def _predict_classifier(self, x: np.ndarray, batch_size: int, training_mode: boo
     def _fit_classifier(self, x: np.ndarray, y: np.ndarray, batch_size: int, nb_epochs: int, **kwargs) -> None:
         return TensorFlowV2Classifier.fit(self, x, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
 
-    def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: int = 10, **kwargs):
+    def fit(self, x: np.ndarray, y: np.ndarray, batch_size: int = 128, nb_epochs: int = 10, **kwargs) -> None:
         """
         Fit the classifier on the training set `(x, y)`.
 
         :param x: Training data.
-        :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes) or indices of shape
-                  (nb_samples,).
-        :param batch_size: Batch size.
-        :key nb_epochs: Number of epochs to use for training
-        :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for PyTorch
-               and providing it takes no effect.
-        :type kwargs: `dict`
-        :return: `None`
+        :param y: Labels, one-hot-encoded of shape (nb_samples, nb_classes) or index labels of
+                  shape (nb_samples,).
+        :param batch_size: Size of batches.
+        :param nb_epochs: Number of epochs to use for training.
+        :param kwargs: Dictionary of framework-specific arguments. This parameter is not currently supported for
+               TensorFlow and providing it takes no effect.
         """
-        RandomizedSmoothingMixin.fit(self, x, y, batch_size=batch_size, nb_epochs=nb_epochs, **kwargs)
+        import tensorflow as tf  # lgtm [py/repeated-import]
+
+        if self._train_step is None:  # pragma: no cover
+            raise TypeError(
+                "The training function `train_step` is required for fitting a model but it has not been " "defined."
+            )
+
+        y = check_and_transform_label_format(y, self.nb_classes)
+
+        # Apply preprocessing
+        x_preprocessed, y_preprocessed = self._apply_preprocessing(x, y, fit=True)
+
+        # Check label shape
+        if self._reduce_labels:
+            y_preprocessed = np.argmax(y_preprocessed, axis=1)
+
+        train_ds = tf.data.Dataset.from_tensor_slices((x_preprocessed, y_preprocessed)).shuffle(10000).batch(batch_size)
+
+        for _ in tqdm(range(nb_epochs)):
+            for images, labels in train_ds:
+                # Add random noise for randomized smoothing
+                images += tf.random.normal(shape=images.shape, mean=0.0, stddev=self.scale)
+                self._train_step(self.model, images, labels)
 
     def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> np.ndarray:  # type: ignore
         """