Integrate preprocesing defences and classifier

Irina Nicolae · Irina Nicolae · commit 93a583902c7c · 2018-05-08T13:35:27.000+01:00
diff --git a/art/attacks/fast_gradient_unittest.py b/art/attacks/fast_gradient_unittest.py
@@ -60,7 +60,7 @@ def test_mnist(self):
 
     def _test_backend_mnist(self, classifier):
         # Get MNIST
-        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
+        (x_train, y_train), (x_test, y_test) = self.mnist
         x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
         x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]
 
@@ -130,49 +130,36 @@ def _test_backend_mnist(self, classifier):
         acc = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
         print('\nAccuracy on adversarial test examples with L2 norm: %.2f%%' % (acc * 100))
 
-    # def test_with_preprocessing(self):
-    #
-    #     session = tf.Session()
-    #     k.set_session(session)
-    #
-    #     comp_params = {"loss": 'categorical_crossentropy',
-    #                    "optimizer": 'adam',
-    #                    "metrics": ['accuracy']}
-    #
-    #     # get MNIST
-    #     batch_size, nb_train, nb_test = 100, 1000, 100
-    #     (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
-    #     X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
-    #     X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
-    #     im_shape = X_train[0].shape
-    #
-    #     # get classifier
-    #     classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"])
-    #     classifier.compile(comp_params)
-    #     classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size)
-    #     scores = classifier.evaluate(X_train, Y_train)
-    #     print("\naccuracy on training set: %.2f%%" % (scores[1] * 100))
-    #     scores = classifier.evaluate(X_test, Y_test)
-    #     print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))
-    #
-    #     attack = FastGradientMethod(classifier, eps=1)
-    #     X_train_adv = attack.generate(X_train)
-    #     X_test_adv = attack.generate(X_test)
-    #
-    #     self.assertFalse((X_train == X_train_adv).all())
-    #     self.assertFalse((X_test == X_test_adv).all())
-    #
-    #     train_y_pred = get_labels_np_array(classifier.predict(X_train_adv))
-    #     test_y_pred = get_labels_np_array(classifier.predict(X_test_adv))
-    #
-    #     self.assertFalse((Y_train == train_y_pred).all())
-    #     self.assertFalse((Y_test == test_y_pred).all())
-    #
-    #     scores = classifier.evaluate(X_train_adv, Y_train)
-    #     print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100))
-    #
-    #     scores = classifier.evaluate(X_test_adv, Y_test)
-    #     print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
+    def test_with_defences(self):
+        # Get MNIST
+        (x_train, y_train), (x_test, y_test) = self.mnist
+        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
+        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]
+
+        # Get the ready-trained Keras model
+        model = self.classifier_k._model
+        classifier = KerasClassifier((0, 1), model, defences='featsqueeze1')
+
+        attack = FastGradientMethod(classifier, eps=1)
+        x_train_adv = attack.generate(x_train)
+        x_test_adv = attack.generate(x_test)
+
+        self.assertFalse((x_train == x_train_adv).all())
+        self.assertFalse((x_test == x_test_adv).all())
+
+        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
+        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))
+
+        self.assertFalse((y_train == train_y_pred).all())
+        self.assertFalse((y_test == test_y_pred).all())
+
+        preds = classifier.predict(x_train_adv)
+        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
+        print('\nAccuracy on adversarial train examples with feature squeezing: %.2f%%' % (acc * 100))
+
+        preds = classifier.predict(x_test_adv)
+        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
+        print('\naccuracy on adversarial test examples: %.2f%%' % (acc * 100))
 
     @staticmethod
     def _cnn_mnist_tf(input_shape):
diff --git a/art/classifiers/classifier.py b/art/classifiers/classifier.py
@@ -3,6 +3,8 @@
 import abc
 import sys
 
+# TODO Add tests for defences on classifier
+
 # Ensure compatibility with Python 2 and 3 when using ABCMeta
 if sys.version_info >= (3, 4):
     ABC = abc.ABC
@@ -14,7 +16,7 @@ class Classifier(ABC):
     """
     Base class for all classifiers.
     """
-    def __init__(self, clip_values):
+    def __init__(self, clip_values, defences=None):
         """
         Initialize a `Classifier` object.
 
@@ -23,6 +25,7 @@ def __init__(self, clip_values):
         :type clip_values: `tuple`
         """
         self._clip_values = clip_values
+        self._parse_defences(defences)
 
     def predict(self, inputs, logits=False):
         """
@@ -110,3 +113,54 @@ def loss_gradient(self, inputs, labels):
         :rtype: `np.ndarray`
         """
         raise NotImplementedError
+
+    def _parse_defences(self, defences):
+        self.defences = defences
+
+        if defences:
+            import re
+            pattern = re.compile("featsqueeze[1-8]?")
+
+            for d in defences:
+                if pattern.match(d):
+                    try:
+                        from art.defences import FeatureSqueezing
+
+                        bit_depth = int(d[-1])
+                        self.feature_squeeze = FeatureSqueezing(bit_depth=bit_depth)
+                    except:
+                        raise ValueError('You must specify the bit depth for feature squeezing: featsqueeze[1-8]')
+
+                # Add label smoothing
+                if d == 'labsmooth':
+                    from art.defences import LabelSmoothing
+                    self.label_smooth = LabelSmoothing()
+
+                # Add spatial smoothing
+                if d == 'smooth':
+                    from art.defences import SpatialSmoothing
+                    self.smooth = SpatialSmoothing()
+
+    def _apply_defences_fit(self, inputs, outputs):
+        # Apply label smoothing if option is set
+        if hasattr(self, 'label_smooth'):
+            _, outputs = self.label_smooth(None, outputs)
+        else:
+            outputs = outputs
+
+        # Apply feature squeezing if option is set
+        if hasattr(self, 'feature_squeeze'):
+            inputs = self.feature_squeeze(inputs)
+
+        return inputs, outputs
+
+    def _apply_defences_predict(self, inputs):
+        # Apply feature squeezing if option is set
+        if hasattr(self, 'feature_squeeze'):
+            inputs = self.feature_squeeze(inputs)
+
+        # Apply inputs smoothing if option is set
+        if hasattr(self, 'smooth'):
+            inputs = self.smooth(inputs)
+
+        return inputs
diff --git a/art/classifiers/keras.py b/art/classifiers/keras.py
@@ -9,7 +9,7 @@ class KerasClassifier(Classifier):
     """
     The supported backends for Keras are TensorFlow and Theano.
     """
-    def __init__(self, clip_values, model, use_logits=False):
+    def __init__(self, clip_values, model, use_logits=False, defences=None):
         """
         Create a `Classifier` instance from a Keras model. Assumes the `model` passed as argument is compiled.
 
@@ -20,11 +20,13 @@ def __init__(self, clip_values, model, use_logits=False):
         :type model: `keras.models.Sequential`
         :param use_logits: True if the output of the model are the logits
         :type use_logits: `bool`
+        :param defences: Defences to be activated with the classifier.
+        :type defences: `str` or `list(str)`
         """
         import keras.backend as k
 
         # TODO Generalize loss function?
-        super(KerasClassifier, self).__init__(clip_values)
+        super(KerasClassifier, self).__init__(clip_values, defences)
 
         self._model = model
         self._input = model.input
@@ -107,8 +109,11 @@ def predict(self, inputs, logits=False):
         :rtype: `np.ndarray`
         """
         import keras.backend as k
-
         k.set_learning_phase(0)
+
+        # Apply defences
+        inputs = self._apply_defences_predict(inputs)
+
         preds = self._preds([inputs])[0]
         if not logits:
             exp = np.exp(preds - np.max(preds, axis=1, keepdims=True))
@@ -131,8 +136,11 @@ def fit(self, inputs, outputs, batch_size=128, nb_epochs=20):
         :return: `None`
         """
         import keras.backend as k
-
         k.set_learning_phase(1)
+
+        # Apply defences
+        inputs, outputs = self._apply_defences_fit(inputs, outputs)
+
         gen = generator(inputs, outputs, batch_size)
         self._model.fit_generator(gen, steps_per_epoch=inputs.shape[0] / batch_size, epochs=nb_epochs)
 
diff --git a/art/classifiers/keras_unittest.py b/art/classifiers/keras_unittest.py
@@ -6,10 +6,9 @@
 from keras.layers import Dense, Activation, Flatten, Conv2D, MaxPooling2D, Dropout
 import numpy as np
 import unittest
-import shutil
 
 from art.classifiers import KerasClassifier
-from art.utils import load_mnist, make_directory
+from art.utils import load_mnist
 
 BATCH_SIZE = 10
 NB_TRAIN = 500
diff --git a/art/classifiers/tensorflow.py b/art/classifiers/tensorflow.py
@@ -10,7 +10,8 @@ class TFClassifier(Classifier):
     """
     This class implements a classifier with the Tensorflow framework.
     """
-    def __init__(self, clip_values, input_ph, logits, output_ph=None, train=None, loss=None, learning=None, sess=None):
+    def __init__(self, clip_values, input_ph, logits, output_ph=None, train=None, loss=None, learning=None, sess=None,
+                 defences=None):
         """
         Initialization specifically for the Tensorflow-based implementation.
 
@@ -32,6 +33,8 @@ def __init__(self, clip_values, input_ph, logits, output_ph=None, train=None, lo
         :type learning: `tf.Placeholder` of type bool.
         :param sess: Computation session.
         :type sess: `tf.Session`
+        :param defences: Defences to be activated with the classifier.
+        :type defences: `str` or `list(str)`
         """
         import tensorflow as tf
 
@@ -73,6 +76,9 @@ def predict(self, inputs, logits=False):
         """
         import tensorflow as tf
 
+        # Apply defences
+        inputs = self._apply_defences_predict(inputs)
+
         # Create feed_dict
         fd = {self._input_ph: inputs}
         if self._learning is not None:
@@ -104,6 +110,9 @@ def fit(self, inputs, outputs, batch_size=128, nb_epochs=10):
         if self._train is None or self._output_ph is None:
             raise ValueError("Need the training objective and the output placeholder to train the model.")
 
+        # Apply defences
+        inputs, outputs = self._apply_defences_fit(inputs, outputs)
+
         num_batch = int(np.ceil(len(inputs) / batch_size))
         ind = np.arange(len(inputs))