Create Preprocessor abstract class for all preprocessing defenses

Irina Nicolae · Irina Nicolae · commit e31cce6cedcd · 2018-02-27T16:45:58.000Z
Also:
  - convert feature squeezing and label smoothing to classes extending Preprocessor
  - change Classifier to use these defenses
diff --git a/src/classifiers/classifier.py b/src/classifiers/classifier.py
@@ -7,7 +7,8 @@
 from keras.layers import Activation
 import tensorflow as tf
 
-from src.defences.preprocessing import label_smoothing, feature_squeezing, tf_feature_squeezing
+from src.defences.feature_squeezing import FeatureSqueezing
+from src.defences.label_smoothing import LabelSmoothing
 from src.layers.activations import BoundedReLU
 
 
@@ -59,14 +60,14 @@ def fit(self, inputs_val, outputs_val, **kwargs):
         :param kwargs: Other parameters
         """
         # Apply label smoothing if option is set
-        if self.label_smooth:
-            y = label_smoothing(outputs_val)
+        if hasattr(self, 'label_smooth'):
+            _, y = self.label_smooth(None, outputs_val)
         else:
             y = outputs_val
 
         # Apply feature squeezing if option is set
-        if self.feature_squeeze:
-            x = feature_squeezing(inputs_val, self.bit_depth)
+        if hasattr(self, 'feature_squeeze'):
+            x = self.feature_squeeze(inputs_val)
         else:
             x = inputs_val
 
@@ -81,8 +82,8 @@ def predict(self, x_val, **kwargs):
         :param kwargs: Other parameters
         :return: Predictions for test set
         """
-        if self.feature_squeeze:
-            x = feature_squeezing(x_val, self.bit_depth)
+        if hasattr(self, 'feature_squeeze'):
+            x = self.feature_squeeze(x_val, self.bit_depth)
 
         else:
             x = x_val
@@ -99,8 +100,8 @@ def evaluate(self, x_val, y_val, **kwargs):
         :return: The accuracy of the model on (x_val, y_val)
         :rtype: float
         """
-        if self.feature_squeeze:
-            x = feature_squeezing(x_val, self.bit_depth)
+        if hasattr(self, 'feature_squeeze'):
+            x = self.feature_squeeze(x_val)
         else:
             x = x_val
 
@@ -143,8 +144,6 @@ def _parse_defences(self, defences):
 
         :param defences: (string) names of the defences to add, supports "featsqueeze[1-8]" and "labsmooth"
         """
-        self.label_smooth = False
-        self.feature_squeeze = False
         self.defences = defences
 
         if defences:
@@ -153,16 +152,15 @@ def _parse_defences(self, defences):
             for d in defences:
                 # Add feature squeezing
                 if pattern.match(d):
-                    self.feature_squeeze = True
-
                     try:
-                        self.bit_depth = int(d[-1])
+                        bit_depth = int(d[-1])
+                        self.feature_squeeze = FeatureSqueezing(bit_depth=bit_depth)
                     except:
                         raise ValueError("You must specify the bit depth for feature squeezing: featsqueeze[1-8]")
 
                 # Add label smoothing
                 if d == "labsmooth":
-                    self.label_smooth = True
+                    self.label_smooth = LabelSmoothing()
 
     def _preprocess(self, x):
         """Apply preprocessing to x
diff --git a/src/classifiers/cnn_unittest.py b/src/classifiers/cnn_unittest.py
@@ -130,7 +130,7 @@ def test_save_load_cnn(self):
         scores_loaded = loaded_classifier.evaluate(X_test, Y_test)
         self.assertAlmostEqual(scores, scores_loaded)
 
-    def test_defences(self):
+    def test_feat_squeeze(self):
         session = tf.Session()
         keras.backend.set_session(session)
 
@@ -147,5 +147,23 @@ def test_defences(self):
         scores = classifier.evaluate(X_test, Y_test)
         print("\naccuracy: %.2f%%" % (scores[1] * 100))
 
+    def test_label_smooth(self):
+
+        session = tf.Session()
+        keras.backend.set_session(session)
+
+        # get MNIST
+        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
+        X_train, Y_train, X_test, Y_test = X_train[:NB_TRAIN], Y_train[:NB_TRAIN], X_test[:NB_TEST], Y_test[:NB_TEST]
+        im_shape = X_train[0].shape
+
+        classifier = CNN(im_shape, act="relu", defences=["labsmooth"])
+        classifier.compile({'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy']})
+
+        # Fit the classifier
+        classifier.fit(X_train, Y_train, epochs=1, batch_size=BATCH_SIZE)
+        scores = classifier.evaluate(X_test, Y_test)
+        print("\naccuracy: %.2f%%" % (scores[1] * 100))
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/src/defences/feature_squeezing.py b/src/defences/feature_squeezing.py
@@ -0,0 +1,69 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import numpy as np
+from tensorflow import rint
+
+from src.defences.preprocessor import Preprocessor
+
+
+class FeatureSqueezing(Preprocessor):
+    """
+    Reduces the sensibility of the features of a sample. Defence method from https://arxiv.org/abs/1704.01155.
+    """
+    params = ['bit_depth']
+
+    def __init__(self, bit_depth=8):
+        """
+        Create an instance of feature squeezing.
+
+        :param bit_depth: (int) The number of bits to encode data on
+        """
+        self.is_fitted = True
+        self.set_params(bit_depth=bit_depth)
+
+    def __call__(self, x_val, bit_depth=8):
+        """
+        Apply feature squeezing to sample x_val.
+
+        :param x_val: (np.ndarray) Sample to squeeze. `x_val` values are supposed to be in the range [0,1]
+        :param bit_depth: (int) The number of bits to encode data on
+        :return: Squeezed sample
+        :rtype: np.ndarray
+        """
+        self.set_params(bit_depth=bit_depth)
+
+        max_value = int(2 ** bit_depth - 1)
+        return np.rint(x_val * max_value) / max_value
+
+    def fit(self, x_val, y_val=None, **kwargs):
+        """No parameters to learn for this method; do nothing."""
+        pass
+
+    def _tf_predict(self, x, bit_depth=8):
+        """
+        Apply feature squeezing on tf.Tensor.
+
+        :param x: (tf.Tensor) Sample to squeeze. Values are supposed to be in the range [0,1]
+        :param bit_depth: (int) The number of bits to encode data on
+        :return: Squeezed sample
+        :rtype: tf.Tensor
+        """
+        self.set_params(bit_depth=bit_depth)
+
+        max_value = int(2 ** bit_depth - 1)
+        x = rint(x * max_value) / max_value
+        return x
+
+    def set_params(self, **kwargs):
+        """Take in a dictionary of parameters and applies defense-specific checks before saving them as attributes.
+
+        Defense-specific parameters:
+        :param bit_depth: (int) The number of bits to encode data on
+        """
+        # Save attack-specific parameters
+        super(FeatureSqueezing, self).set_params(**kwargs)
+
+        if type(self.bit_depth) is not int or self.bit_depth <= 0 or self.bit_depth > 60:
+            raise ValueError("The bit depth must be between 1 and 60.")
+
+        return True
diff --git a/src/defences/feature_squeezing_unittest.py b/src/defences/feature_squeezing_unittest.py
@@ -1,32 +1,11 @@
-from __future__ import absolute_import, division, print_function
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 import unittest
 
 import numpy as np
 import tensorflow as tf
 
-from src.defences.preprocessing import feature_squeezing, label_smoothing, tf_feature_squeezing
-
-
-class TestLabelSmoothing(unittest.TestCase):
-    def test_default(self):
-        m, n = 1000, 20
-        y = np.zeros((m, n))
-        y[(range(m), np.random.choice(range(n), m))] = 1.
-
-        smooth_y = label_smoothing(y)
-        self.assertTrue(np.isclose(np.sum(smooth_y, axis=1), np.ones(m)).all())
-        self.assertTrue((np.max(smooth_y, axis=1) == np.ones(m)*0.9).all())
-
-    def test_customizing(self):
-        m, n = 1000, 20
-        y = np.zeros((m, n))
-        y[(range(m), np.random.choice(range(n), m))] = 1.
-
-        smooth_y = label_smoothing(y, max_value=1./n)
-        self.assertTrue(np.isclose(np.sum(smooth_y, axis=1), np.ones(m)).all())
-        self.assertTrue((np.max(smooth_y, axis=1) == np.ones(m) / n).all())
-        self.assertTrue(np.isclose(smooth_y, np.ones((m, n)) / n).all())
+from src.defences.feature_squeezing import FeatureSqueezing
 
 
 class TestFeatureSqueezing(unittest.TestCase):
@@ -36,7 +15,8 @@ def test_ones(self):
 
         for depth in range(1,50):
             with self.subTest("bit depth = {}".format(depth)):
-                squeezed_x = feature_squeezing(x, depth)
+                preproc = FeatureSqueezing()
+                squeezed_x = preproc(x, depth)
                 self.assertTrue((squeezed_x == 1).all())
 
     def test_random(self):
@@ -45,11 +25,12 @@ def test_random(self):
         x_zero = np.where(x < 0.5)
         x_one = np.where(x >= 0.5)
 
-        squeezed_x = feature_squeezing(x, 1)
+        preproc = FeatureSqueezing()
+        squeezed_x = preproc(x, 1)
         self.assertTrue((squeezed_x[x_zero] == 0.).all())
         self.assertTrue((squeezed_x[x_one] == 1.).all())
 
-        squeezed_x = feature_squeezing(x, 2)
+        squeezed_x = preproc(x, 2)
         self.assertFalse(np.logical_and(0. < squeezed_x, squeezed_x < 0.33).any())
         self.assertFalse(np.logical_and(0.34 < squeezed_x, squeezed_x < 0.66).any())
         self.assertFalse(np.logical_and(0.67 < squeezed_x, squeezed_x < 1.).any())
@@ -59,10 +40,11 @@ def test_tf_feature_squeezing(self):
         m, n = 10, 2
         sess = tf.Session()
         x = tf.ones((m, n))
+        fs = FeatureSqueezing()
 
         for depth in range(1, 10):
             with self.subTest("bit depth = {}".format(depth)):
-                squeezed_x = sess.run(tf_feature_squeezing(x, depth))
+                squeezed_x = sess.run(fs._tf_predict(x, depth))
                 self.assertTrue((squeezed_x == 1).all())
 
         # With placeholders
@@ -71,7 +53,7 @@ def test_tf_feature_squeezing(self):
         x_op = tf.placeholder(tf.float32, shape=[None, 2])
         for depth in range(1, 10):
             with self.subTest("bit depth = {}".format(depth)):
-                squeezed_x = sess.run(tf_feature_squeezing(x_op, depth), feed_dict={x_op: x})
+                squeezed_x = sess.run(fs._tf_predict(x_op, depth), feed_dict={x_op: x})
                 self.assertTrue((squeezed_x == 1).all())
 
 
diff --git a/src/defences/label_smoothing.py b/src/defences/label_smoothing.py
@@ -0,0 +1,56 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from src.defences.preprocessor import Preprocessor
+
+
+class LabelSmoothing(Preprocessor):
+    """
+    Computes a vector of smooth labels from a vector of hard ones. The hard labels have to contain ones for the
+    correct classes and zeros for all the others. The remaining probability mass between `max_value` and 1 is
+    distributed uniformly between the incorrect classes for each instance.
+    """
+    params = ['max_value']
+
+    def __init__(self, max_value=.9):
+        """
+        Create an instance of label smoothing.
+        """
+        self.is_fitted = True
+        self.set_params(max_value=max_value)
+
+    def __call__(self, x_val, y_val, max_value=0.9):
+        """
+        Apply label smoothing.
+
+        :param x_val: (np.ndarray) Input data, will not be modified by this method
+        :param y_val: (np.ndarray) Original vector of label probabilities (one-vs-rest)
+        :param max_value: (float) Value to affect to correct label
+        :return: (np.ndarray, np.ndarray) Unmodified input data and the vector of smooth probabilities as labels
+        """
+        self.set_params(max_value=max_value)
+
+        min_value = (1 - max_value) / (y_val.shape[1] - 1)
+        assert max_value >= min_value
+
+        smooth_y = y_val.copy()
+        smooth_y[smooth_y == 1.] = max_value
+        smooth_y[smooth_y == 0.] = min_value
+        return x_val, smooth_y
+
+    def fit(self, x_val, y_val=None, **kwargs):
+        """No parameters to learn for this method; do nothing."""
+        pass
+
+    def set_params(self, **kwargs):
+        """Take in a dictionary of parameters and applies defense-specific checks before saving them as attributes.
+
+        Defense-specific parameters:
+        :param max_value: (float) Value to affect to correct label
+        """
+        # Save attack-specific parameters
+        super(LabelSmoothing, self).set_params(**kwargs)
+
+        if self.max_value <= 0 or self.max_value > 1:
+            raise ValueError("The maximum value for correct labels must be between 0 and 1.")
+
+        return True
diff --git a/src/defences/label_smoothing_unittest.py b/src/defences/label_smoothing_unittest.py
@@ -0,0 +1,34 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import unittest
+
+import numpy as np
+
+from src.defences.label_smoothing import LabelSmoothing
+
+
+class TestLabelSmoothing(unittest.TestCase):
+    def test_default(self):
+        m, n = 1000, 20
+        y = np.zeros((m, n))
+        y[(range(m), np.random.choice(range(n), m))] = 1.
+
+        ls = LabelSmoothing()
+        _, smooth_y = ls(None, y)
+        self.assertTrue(np.isclose(np.sum(smooth_y, axis=1), np.ones(m)).all())
+        self.assertTrue((np.max(smooth_y, axis=1) == np.ones(m)*0.9).all())
+
+    def test_customizing(self):
+        m, n = 1000, 20
+        y = np.zeros((m, n))
+        y[(range(m), np.random.choice(range(n), m))] = 1.
+
+        ls = LabelSmoothing()
+        _, smooth_y = ls(None, y, max_value=1./n)
+        self.assertTrue(np.isclose(np.sum(smooth_y, axis=1), np.ones(m)).all())
+        self.assertTrue((np.max(smooth_y, axis=1) == np.ones(m) / n).all())
+        self.assertTrue(np.isclose(smooth_y, np.ones((m, n)) / n).all())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/src/defences/preprocessing.py b/src/defences/preprocessing.py
diff --git a/src/defences/preprocessor.py b/src/defences/preprocessor.py