Merge Clever fix, Gaussian aug and Keras pred batching

Irina Nicolae · Irina Nicolae · commit a9c7531e819e · 2018-05-16T15:54:21.000+01:00
diff --git a/art/classifiers/keras.py b/art/classifiers/keras.py
@@ -114,10 +114,16 @@ def predict(self, inputs, logits=False):
         # Apply defences
         inputs = self._apply_defences_predict(inputs)
 
-        preds = self._preds([inputs])[0]
-        if not logits:
-            exp = np.exp(preds - np.max(preds, axis=1, keepdims=True))
-            preds = exp / np.sum(exp, axis=1, keepdims=True)
+        # Run predictions with batching
+        batch_size = 512
+        preds = np.zeros((inputs.shape[0], self.nb_classes), dtype=np.float32)
+        for b in range(inputs.shape[0] // batch_size + 1):
+            begin, end = b * batch_size,  min((b + 1) * batch_size, inputs.shape[0])
+            preds[begin:end] = self._preds([inputs[begin:end]])[0]
+
+            if not logits:
+                exp = np.exp(preds[begin:end] - np.max(preds[begin:end], axis=1, keepdims=True))
+                preds[begin:end] = exp / np.sum(exp, axis=1, keepdims=True)
 
         return preds
 
@@ -141,13 +147,13 @@ def fit(self, inputs, outputs, batch_size=128, nb_epochs=20):
         # Apply defences
         inputs, outputs = self._apply_defences_fit(inputs, outputs)
 
-        gen = generator(inputs, outputs, batch_size)
+        gen = generator_fit(inputs, outputs, batch_size)
         self._model.fit_generator(gen, steps_per_epoch=inputs.shape[0] / batch_size, epochs=nb_epochs)
 
 
-def generator(data, labels, batch_size=128):
+def generator_fit(data, labels, batch_size=128):
     """
-    Minimal data generator for batching large datasets.
+    Minimal data generator for randomly batching large datasets.
 
     :param data: The data sample to batch.
     :type data: `np.ndarray`
@@ -160,4 +166,4 @@ def generator(data, labels, batch_size=128):
     """
     while True:
         indices = np.random.randint(data.shape[0], size=batch_size)
-        yield data[indices], labels[indices]
+        yield data[indices], labels[indices]
diff --git a/art/defences/__init__.py b/art/defences/__init__.py
@@ -3,5 +3,6 @@
 """
 from art.defences.adversarial_trainer import AdversarialTrainer
 from art.defences.feature_squeezing import FeatureSqueezing
+from art.defences.gaussian_augmentation import GaussianAugmentation
 from art.defences.label_smoothing import LabelSmoothing
 from art.defences.spatial_smoothing import SpatialSmoothing
diff --git a/art/defences/gaussian_augmentation.py b/art/defences/gaussian_augmentation.py
@@ -0,0 +1,88 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from art.defences.preprocessor import Preprocessor
+
+
+class GaussianAugmentation(Preprocessor):
+    """
+    Perform Gaussian augmentation on a dataset.
+    """
+    params = ['sigma', 'ratio']
+
+    def __init__(self, sigma=1., ratio=1.):
+        """
+        Initialize a Gaussian augmentation object.
+
+        :param sigma: Standard deviation of Gaussian noise to be added.
+        :type sigma: `float`
+        :param ratio: Percentage of data augmentation. E.g. for a rate of 1, the size of the dataset will double.
+        :type ratio: `float`
+        """
+        super(GaussianAugmentation, self).__init__()
+        self._is_fitted = True
+        self.set_params(sigma=sigma, ratio=ratio)
+
+    def __call__(self, x, y=None, sigma=None, ratio=None):
+        """
+        Augment the sample `(x, y)` with Gaussian noise. The result is an extended dataset containing the original
+        sample, as well as the newly created noisy samples.
+
+        :param x: Sample to augment with shape `(batch_size, width, height, depth)`.
+        :type x: `np.ndarray`
+        :param y: Labels for the sample. If this argument is provided, it will be augmented with the corresponded
+                  original labels of each sample point.
+        :param sigma: Standard deviation of Gaussian noise to be added.
+        :type sigma: `float`
+        :param ratio: Percentage of data augmentation. E.g. for a ratio of 1, the size of the dataset will double.
+        :type ratio: `float`
+        :return: The augmented dataset and (if provided) corresponding labels.
+        :rtype:
+        """
+        # Set params
+        params = {}
+        if sigma is not None:
+            params['sigma'] = sigma
+
+        if ratio is not None:
+            params['ratio'] = ratio
+
+        if params:
+            self.set_params(**params)
+
+        # Select indices to augment
+        import numpy as np
+        size = int(x.shape[0] * self.ratio)
+        indices = np.random.randint(0, x.shape[0], size=size)
+
+        # Generate noisy samples
+        x_aug = np.random.normal(x[indices], scale=self.sigma, size=(size,) + x[indices].shape[1:])
+        x_aug = np.vstack((x, x_aug))
+
+        if y is not None:
+            y_aug = np.concatenate((y, y[indices]))
+            return x_aug, y_aug
+        else:
+            return x_aug
+
+    def fit(self, x, y=None, **kwargs):
+        """
+        No parameters to learn for this method; do nothing.
+        """
+        pass
+
+    def set_params(self, **kwargs):
+        """
+        Take in a dictionary of parameters and applies defense-specific checks before saving them as attributes.
+
+        :param sigma: Standard deviation of Gaussian noise to be added.
+        :type sigma: `float`
+        :param ratio: Percentage of data augmentation. E.g. for a ratio of 1, the size of the dataset will double.
+        :type ratio: `float`
+        """
+        # Save attack-specific parameters
+        super(GaussianAugmentation, self).set_params(**kwargs)
+
+        if self.ratio <= 0:
+            raise ValueError("The augmentation ratio must be positive.")
+
+        return True
diff --git a/art/defences/gaussian_augmentation_unittest.py b/art/defences/gaussian_augmentation_unittest.py
@@ -0,0 +1,37 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import unittest
+
+import numpy as np
+
+from art.defences.gaussian_augmentation import GaussianAugmentation
+
+
+class TestGaussianAugmentation(unittest.TestCase):
+    def test_small_size(self):
+        x = np.arange(15).reshape((5, 3))
+        ga = GaussianAugmentation()
+        new_x = ga(x, ratio=.4)
+        self.assertTrue(new_x.shape == (7, 3))
+
+    def test_double_size(self):
+        x = np.arange(12).reshape((4, 3))
+        ga = GaussianAugmentation()
+        new_x = ga(x)
+        self.assertTrue(new_x.shape[0] == 2 * x.shape[0])
+
+    def test_multiple_size(self):
+        x = np.arange(12).reshape((4, 3))
+        ga = GaussianAugmentation(ratio=3.5)
+        new_x = ga(x)
+        self.assertTrue(int(4.5 * x.shape[0]) == new_x.shape[0])
+
+    def test_labels(self):
+        x = np.arange(12).reshape((4, 3))
+        y = np.arange(8).reshape((4, 2))
+
+        ga = GaussianAugmentation()
+        new_x, new_y = ga(x, y)
+        self.assertTrue(new_x.shape[0] == new_y.shape[0] == 8)
+        self.assertTrue(new_x.shape[1:] == x.shape[1:])
+        self.assertTrue(new_y.shape[1:] == y.shape[1:])
diff --git a/art/defences/spatial_smoothing.py b/art/defences/spatial_smoothing.py
@@ -24,7 +24,7 @@ def __init__(self, window_size=3):
 
     def __call__(self, x, window_size=None):
         """
-        Apply local spatial smoothing to sample `x_val`.
+        Apply local spatial smoothing to sample `x`.
 
         :param x: Sample to smooth with shape `(batch_size, width, height, depth)`.
         :type x: `np.ndarray`
diff --git a/art/metrics.py b/art/metrics.py
@@ -201,14 +201,6 @@ def clever_t(classifier, x, target_class, n_b, n_s, r, norm, c_init=1, pool_fact
     if pool_factor < 1:
         raise ValueError("The pool_factor must be larger than 1")
 
-    # Change norm since q = p / (p-1)
-    if norm == 1:
-        norm = np.inf
-    elif norm == np.inf:
-        norm = 1
-    elif norm != 2:
-        raise ValueError("Norm {} not supported".format(norm))
-
     # Some auxiliary vars
     grad_norm_set = []
     dim = reduce(lambda x_, y: x_ * y, x.shape, 1)
@@ -220,6 +212,14 @@ def clever_t(classifier, x, target_class, n_b, n_s, r, norm, c_init=1, pool_fact
     rand_pool += np.repeat(np.array([x]), pool_factor * n_s, 0)
     np.clip(rand_pool, classifier.clip_values[0], classifier.clip_values[1], out=rand_pool)
 
+    # Change norm since q = p / (p-1)
+    if norm == 1:
+        norm = np.inf
+    elif norm == np.inf:
+        norm = 1
+    elif norm != 2:
+        raise ValueError("Norm {} not supported".format(norm))
+
     # Loop over n_b batches
     for i in range(n_b):
         # Random generation of data points
diff --git a/docs/modules/defences.rst b/docs/modules/defences.rst
@@ -25,3 +25,9 @@ Adversarial Training
 .. autoclass:: AdversarialTrainer
    :members:
    :special-members:
+
+Gaussian Data Augmentation
+--------------------------
+.. autoclass:: GaussianAugmentation
+   :members:
+   :special-members:
diff --git a/docs/modules/detection.rst b/docs/modules/detection.rst
@@ -1,5 +1,5 @@
 :mod:`art.detection`
-===================
+====================
 
 Base Class
 ----------