Merge batching (close #41)

Irina Nicolae · Irina Nicolae · commit a031768a492b · 2018-08-21T16:12:57.000+01:00
diff --git a/art/attacks/fast_gradient.py b/art/attacks/fast_gradient.py
@@ -12,9 +12,9 @@ class FastGradientMethod(Attack):
     Gradient Sign Method"). This implementation extends the attack to other norms, and is therefore called the Fast
     Gradient Method. Paper link: https://arxiv.org/abs/1412.6572
     """
-    attack_params = Attack.attack_params + ['norm', 'eps', 'targeted', 'random_init']
+    attack_params = Attack.attack_params + ['norm', 'eps', 'targeted', 'random_init', 'batch_size']
 
-    def __init__(self, classifier, norm=np.inf, eps=.3, targeted=False, random_init=False):
+    def __init__(self, classifier, norm=np.inf, eps=.3, targeted=False, random_init=False, batch_size=128):
         """
         Create a :class:`FastGradientMethod` instance.
 
@@ -28,13 +28,16 @@ def __init__(self, classifier, norm=np.inf, eps=.3, targeted=False, random_init=
         :type targeted: `bool`
         :param random_init: Whether to start at the original input or a random point within the epsilon ball
         :type random_init: `bool`
+        :param batch_size: Batch size
+        :type batch_size: `int`
         """
         super(FastGradientMethod, self).__init__(classifier)
 
         self.norm = norm
         self.eps = eps
         self.targeted = targeted
         self.random_init = random_init
+        self.batch_size = batch_size
 
     def _minimal_perturbation(self, x, y, eps_step=0.1, eps_max=1., **kwargs):
         """Iteratively compute the minimal perturbation necessary to make the class prediction change. Stop when the
@@ -55,9 +58,8 @@ def _minimal_perturbation(self, x, y, eps_step=0.1, eps_max=1., **kwargs):
         adv_x = x.copy()
 
         # Compute perturbation with implicit batching
-        batch_size = 128
-        for batch_id in range(adv_x.shape[0] // batch_size + 1):
-            batch_index_1, batch_index_2 = batch_id * batch_size, min((batch_id + 1) * batch_size, x.shape[0])
+        for batch_id in range(int(np.ceil(adv_x.shape[0] / float(self.batch_size)))):
+            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
             batch = adv_x[batch_index_1:batch_index_2]
             batch_labels = y[batch_index_1:batch_index_2]
 
@@ -101,6 +103,8 @@ def generate(self, x, **kwargs):
         :type minimal: `bool`
         :param random_init: Whether to start at the original input or a random point within the epsilon ball
         :type random_init: `bool`
+        :param batch_size: Batch size
+        :type batch_size: `int`
         :return: An array holding the adversarial examples.
         :rtype: `np.ndarray`
         """
@@ -134,6 +138,8 @@ def set_params(self, **kwargs):
         :type eps: `float`
         :param targeted: Should the attack target one specific class
         :type targeted: `bool`
+        :param batch_size: Batch size
+        :type batch_size: `int`
         """
         # Save attack-specific parameters
         super(FastGradientMethod, self).set_params(**kwargs)
@@ -144,6 +150,10 @@ def set_params(self, **kwargs):
 
         if self.eps <= 0:
             raise ValueError('The perturbation size `eps` has to be positive.')
+
+        if self.batch_size <= 0:
+            raise ValueError('The batch size `batch_size` has to be positive.')
+
         return True
 
     def _compute_perturbation(self, batch, batch_labels):
@@ -179,9 +189,8 @@ def _compute(self, x, y, eps, random_init):
             adv_x = x.copy()
 
         # Compute perturbation with implicit batching
-        batch_size = 128
-        for batch_id in range(adv_x.shape[0] // batch_size + 1):
-            batch_index_1, batch_index_2 = batch_id * batch_size, (batch_id + 1) * batch_size
+        for batch_id in range(int(np.ceil(adv_x.shape[0] / float(self.batch_size)))):
+            batch_index_1, batch_index_2 = batch_id * self.batch_size, (batch_id + 1) * self.batch_size
             batch = adv_x[batch_index_1:batch_index_2]
             batch_labels = y[batch_index_1:batch_index_2]
 
diff --git a/art/attacks/fast_gradient_unittest.py b/art/attacks/fast_gradient_unittest.py
@@ -105,8 +105,8 @@ def _test_backend_mnist(self, classifier):
 
         # Test FGSM with np.inf norm
         attack = FastGradientMethod(classifier, eps=1)
-        x_test_adv = attack.generate(x_test)
-        x_train_adv = attack.generate(x_train)
+        x_test_adv = attack.generate(x_test, **{'batch_size': 2})
+        x_train_adv = attack.generate(x_train, **{'batch_size': 4})
 
         self.assertFalse((x_train == x_train_adv).all())
         self.assertFalse((x_test == x_test_adv).all())
diff --git a/art/attacks/universal_perturbation_unittest.py b/art/attacks/universal_perturbation_unittest.py
@@ -14,7 +14,7 @@
 from art.classifiers import KerasClassifier, PyTorchClassifier, TFClassifier
 from art.utils import load_mnist
 
-BATCH_SIZE, NB_TRAIN, NB_TEST = 100, 1000, 10
+BATCH_SIZE, NB_TRAIN, NB_TEST = 100, 500, 10
 
 
 class Model(nn.Module):
@@ -82,7 +82,7 @@ def test_tfclassifier(self):
 
         # Attack
         # TODO Launch with all possible attacks
-        attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 20}}
+        attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 5}}
         up = UniversalPerturbation(tfc)
         x_train_adv = up.generate(x_train, **attack_params)
         self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)
@@ -123,7 +123,7 @@ def test_krclassifier(self):
 
         # Attack
         # TODO Launch with all possible attacks
-        attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 20}}
+        attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 5}}
         up = UniversalPerturbation(krc)
         x_train_adv = up.generate(x_train, **attack_params)
         self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)
@@ -160,7 +160,7 @@ def test_ptclassifier(self):
 
         # Attack
         # TODO Launch with all possible attacks
-        attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 20}}
+        attack_params = {"attacker": "newtonfool", "attacker_params": {"max_iter": 5}}
         up = UniversalPerturbation(ptc)
         x_train_adv = up.generate(x_train, **attack_params)
         self.assertTrue((up.fooling_rate >= 0.2) or not up.converged)
diff --git a/art/classifiers/classifier.py b/art/classifiers/classifier.py
@@ -44,14 +44,16 @@ def __init__(self, clip_values, channel_index, defences=None, preprocessing=(0,
         self._preprocessing = preprocessing
 
     @abc.abstractmethod
-    def predict(self, x, logits=False):
+    def predict(self, x, logits=False, batch_size=128):
         """
         Perform prediction for a batch of inputs.
 
         :param x: Test set.
         :type x: `np.ndarray`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
+        :param batch_size: Size of batches.
+        :type batch_size: `int`
         :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`.
         :rtype: `np.ndarray`
         """
diff --git a/art/classifiers/keras.py b/art/classifiers/keras.py
@@ -146,14 +146,16 @@ def class_gradient(self, x, label=None, logits=False):
 
         return grads
 
-    def predict(self, x, logits=False):
+    def predict(self, x, logits=False, batch_size=128):
         """
         Perform prediction for a batch of inputs.
 
         :param x: Test set.
         :type x: `np.ndarray`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
+        :param batch_size: Size of batches.
+        :type batch_size: `int`
         :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`.
         :rtype: `np.ndarray`
         """
@@ -165,9 +167,8 @@ def predict(self, x, logits=False):
         x_ = self._apply_defences_predict(x_)
 
         # Run predictions with batching
-        batch_size = 512
         preds = np.zeros((x_.shape[0], self.nb_classes), dtype=np.float32)
-        for b in range(x_.shape[0] // batch_size + 1):
+        for b in range(int(np.ceil(x_.shape[0] / float(batch_size)))):
             begin, end = b * batch_size,  min((b + 1) * batch_size, x_.shape[0])
             preds[begin:end] = self._preds([x_[begin:end]])[0]
 
diff --git a/art/classifiers/mxnet.py b/art/classifiers/mxnet.py
@@ -99,14 +99,16 @@ def fit(self, x, y, batch_size=128, nb_epochs=20):
                 # Update parameters
                 self._optimizer.step(batch_size)
 
-    def predict(self, x, logits=False):
+    def predict(self, x, logits=False, batch_size=128):
         """
         Perform prediction for a batch of inputs.
 
         :param x: Test set.
         :type x: `np.ndarray`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
+        :param batch_size: Size of batches.
+        :type batch_size: `int`
         :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`.
         :rtype: `np.ndarray`
         """
@@ -116,26 +118,25 @@ def predict(self, x, logits=False):
         x_ = self._apply_processing(x)
         x_ = self._apply_defences_predict(x_)
 
-        # Predict
-        # TODO add batching?
-        x_ = nd.array(x_, ctx=self._ctx)
-        x_.attach_grad()
-        with autograd.record(train_mode=False):
-            preds = self._model(x_)
+        # Run prediction with batch processing
+        results = np.zeros((x_.shape[0], self.nb_classes), dtype=np.float32)
+        num_batch = int(np.ceil(len(x_) / float(batch_size)))
+        for m in range(num_batch):
+            # Batch indexes
+            begin, end = m * batch_size, min((m + 1) * batch_size, x_.shape[0])
+
+            # Predict
+            x_batch = nd.array(x_[begin:end], ctx=self._ctx)
+            x_batch.attach_grad()
+            with autograd.record(train_mode=False):
+                preds = self._model(x_batch)
 
-        if logits is True:
-            preds = preds.softmax()
+            if logits is False:
+                preds = preds.softmax()
 
-        # preds = np.empty((x.shape[0], self.nb_classes), dtype=float)
-        # pred_iter = mx.io.NDArrayIter(data=x_, batch_size=128)
-        # if logits is True:
-        #     for preds_i, i, batch in mod.iter_predict(pred_iter):
-        #         pred_label = preds_i[0].asnumpy()
-        # else:
-        #     for preds_i, i, batch in mod.iter_predict(pred_iter):
-        #         pred_label = preds_i[0].softmax().asnumpy()
+            results[begin:end] = preds.asnumpy()
 
-        return preds.asnumpy()
+        return results
 
     def class_gradient(self, x, label=None, logits=False):
         """
diff --git a/art/classifiers/pytorch.py b/art/classifiers/pytorch.py
@@ -59,14 +59,16 @@ def __init__(self, clip_values, model, loss, optimizer, input_shape, nb_classes,
         self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self._model.to(self._device)
 
-    def predict(self, x, logits=False):
+    def predict(self, x, logits=False, batch_size=128):
         """
         Perform prediction for a batch of inputs.
 
         :param x: Test set.
         :type x: `np.ndarray`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
+        :param batch_size: Size of batches.
+        :type batch_size: `int`
         :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`.
         :rtype: `np.ndarray`
         """
@@ -85,15 +87,22 @@ def predict(self, x, logits=False):
         #     exp = np.exp(preds - np.max(preds, axis=1, keepdims=True))
         #     preds = exp / np.sum(exp, axis=1, keepdims=True)
 
-        model_outputs = self._model(torch.from_numpy(x_).to(self._device).float())
-        (logit_output, output) = (model_outputs[-2], model_outputs[-1])
+        # Run prediction with batch processing
+        results = np.zeros((x_.shape[0], self.nb_classes), dtype=np.float32)
+        num_batch = int(np.ceil(len(x_) / float(batch_size)))
+        for m in range(num_batch):
+            # Batch indexes
+            begin, end = m * batch_size, min((m + 1) * batch_size, x_.shape[0])
 
-        if logits:
-            preds = logit_output.detach().cpu().numpy()
-        else:
-            preds = output.detach().cpu().numpy()
+            model_outputs = self._model(torch.from_numpy(x_[begin:end]).to(self._device).float())
+            (logit_output, output) = (model_outputs[-2], model_outputs[-1])
+
+            if logits:
+                results[begin:end] = logit_output.detach().cpu().numpy()
+            else:
+                results[begin:end] = output.detach().cpu().numpy()
 
-        return preds
+        return results
 
     def fit(self, x, y, batch_size=128, nb_epochs=10):
         """
@@ -119,7 +128,7 @@ def fit(self, x, y, batch_size=128, nb_epochs=10):
         # Set train phase
         self._model.train(True)
 
-        num_batch = int(np.ceil(len(x_) / batch_size))
+        num_batch = int(np.ceil(len(x_) / float(batch_size)))
         ind = np.arange(len(x_))
 
         # Start training
@@ -129,12 +138,8 @@ def fit(self, x, y, batch_size=128, nb_epochs=10):
 
             # Train for one epoch
             for m in range(num_batch):
-                if m < num_batch - 1:
-                    i_batch = torch.from_numpy(x_[ind[m * batch_size:(m + 1) * batch_size]]).to(self._device)
-                    o_batch = torch.from_numpy(y_[ind[m * batch_size:(m + 1) * batch_size]]).to(self._device)
-                else:
-                    i_batch = torch.from_numpy(x_[ind[m * batch_size:]]).to(self._device)
-                    o_batch = torch.from_numpy(y_[ind[m * batch_size:]]).to(self._device)
+                i_batch = torch.from_numpy(x_[ind[m * batch_size:(m + 1) * batch_size]]).to(self._device)
+                o_batch = torch.from_numpy(y_[ind[m * batch_size:(m + 1) * batch_size]]).to(self._device)
 
                 # Cast to float
                 i_batch = i_batch.float()
diff --git a/art/classifiers/tensorflow.py b/art/classifiers/tensorflow.py
@@ -75,33 +75,40 @@ def __init__(self, clip_values, input_ph, logits, output_ph=None, train=None, lo
         if self._loss is not None:
             self._loss_grads = tf.gradients(self._loss, self._input_ph)[0]
 
-    def predict(self, x, logits=False):
+    def predict(self, x, logits=False, batch_size=128):
         """
         Perform prediction for a batch of inputs.
 
         :param x: Test set.
         :type x: `np.ndarray`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
+        :param batch_size: Size of batches.
+        :type batch_size: `int`
         :return: Array of predictions of shape `(nb_inputs, self.nb_classes)`.
         :rtype: `np.ndarray`
         """
-        import tensorflow as tf
-
         # Apply defences
         x_ = self._apply_processing(x)
         x_ = self._apply_defences_predict(x_)
 
-        # Create feed_dict
-        fd = {self._input_ph: x_}
-        if self._learning is not None:
-            fd[self._learning] = False
+        # Run prediction with batch processing
+        results = np.zeros((x_.shape[0], self.nb_classes), dtype=np.float32)
+        num_batch = int(np.ceil(len(x_) / float(batch_size)))
+        for m in range(num_batch):
+            # Batch indexes
+            begin, end = m * batch_size, min((m + 1) * batch_size, x_.shape[0])
 
-        # Run prediction
-        if logits:
-            results = self._sess.run(self._logits, feed_dict=fd)
-        else:
-            results = self._sess.run(self._probs, feed_dict=fd)
+            # Create feed_dict
+            fd = {self._input_ph: x_[begin:end]}
+            if self._learning is not None:
+                fd[self._learning] = False
+
+            # Run prediction
+            if logits:
+                results[begin:end] = self._sess.run(self._logits, feed_dict=fd)
+            else:
+                results[begin:end] = self._sess.run(self._probs, feed_dict=fd)
 
         return results
 
@@ -127,7 +134,7 @@ def fit(self, x, y, batch_size=128, nb_epochs=10):
         x_ = self._apply_processing(x)
         x_, y_ = self._apply_defences_fit(x_, y)
 
-        num_batch = int(np.ceil(len(x_) / batch_size))
+        num_batch = int(np.ceil(len(x_) / float(batch_size)))
         ind = np.arange(len(x_))
 
         # Start training
@@ -137,12 +144,8 @@ def fit(self, x, y, batch_size=128, nb_epochs=10):
 
             # Train for one epoch
             for m in range(num_batch):
-                if m < num_batch - 1:
-                    i_batch = x_[ind[m * batch_size:(m + 1) * batch_size]]
-                    o_batch = y_[ind[m * batch_size:(m + 1) * batch_size]]
-                else:
-                    i_batch = x_[ind[m * batch_size:]]
-                    o_batch = y_[ind[m * batch_size:]]
+                i_batch = x_[ind[m * batch_size:(m + 1) * batch_size]]
+                o_batch = y_[ind[m * batch_size:(m + 1) * batch_size]]
 
                 # Run train step
                 if self._learning is None: