Optimize FGSM minimal perturbation

Irina Nicolae · Irina Nicolae · commit d164140c6bf3 · 2018-03-23T11:11:42.000Z
diff --git a/src/attacks/fast_gradient.py b/src/attacks/fast_gradient.py
@@ -31,10 +31,10 @@ def __init__(self, classifier, sess=None, ord=np.inf, y=None, targeted=False, cl
         kwargs = {'ord': ord, 'targeted': targeted, 'clip_min': clip_min, 'clip_max': clip_max, 'y': y}
         self.set_params(**kwargs)
 
-    def generate_graph(self, x, eps=0.3, **kwargs):
+    def generate_graph(self, x, eps, **kwargs):
         """Generate symbolic graph for adversarial examples and return.
         :param x: The model's symbolic inputs.
-        :param eps: (optional float) attack step size (input variation)
+        :param eps: (optional tf.placeholder) The placeholder for input variation (noise amplitude)
         :param ord: (optional) Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2.
         :param y: (optional) A placeholder for the model labels. Only provide this parameter if you'd like to use true
                   labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
@@ -93,21 +93,20 @@ def minimal_perturbations(self, x, x_val, eps_step=0.1, eps_max=1., **kwargs):
         y = np.argmax(self.model.predict(x_val), 1)
         adv_x = x_val.copy()
 
-        curr_indexes = np.arange(len(x_val))
-        eps = eps_step
+        curr_indexes = np.arange(len(adv_x))
+        eps = tf.placeholder(tf.float32, None)
+        adv_x_op = self.generate_graph(x, eps, **kwargs)
+        adv_y = tf.argmax(self.model(adv_x_op), 1)
+        eps_val = eps_step
 
-        while len(curr_indexes) != 0 and eps <= eps_max:
+        while len(curr_indexes) != 0 and eps_val <= eps_max:
             # Adversarial crafting
-            adv_x_op = self.generate_graph(x, eps=eps, **kwargs)
-            adv_y = tf.argmax(self.model(adv_x_op), 1)
-
-            new_adv_x, new_y = self.sess.run([adv_x_op, adv_y], {x: x_val[curr_indexes]})
+            new_adv_x, new_y = self.sess.run([adv_x_op, adv_y], {x: x_val[curr_indexes], eps: eps_val})
 
             # Update
             adv_x[curr_indexes] = new_adv_x
             curr_indexes = np.where(y[curr_indexes] == new_y)[0]
-
-            eps += eps_step
+            eps_val += eps_step
 
         return adv_x
 
@@ -125,14 +124,15 @@ def generate(self, x_val, **kwargs):
         :return: A Numpy array holding the adversarial examples.
         """
 
-        input_shape = list(x_val.shape)
-        input_shape[0] = None
+        input_shape = [None] + list(x_val.shape[1:])
         self._x = tf.placeholder(tf.float32, shape=input_shape)
         k.set_learning_phase(0)
 
+        # Return adversarial examples computed with minimal perturbation if option is active
         if "minimal" in kwargs and kwargs["minimal"]:
             return self.minimal_perturbations(self._x, x_val, **kwargs)
 
+        # Generate computation graph
         self._x_adv = self.generate_graph(self._x, **kwargs)
 
         # Run symbolic graph without or with true labels
diff --git a/src/attacks/fast_gradient_unittest.py b/src/attacks/fast_gradient_unittest.py
@@ -83,10 +83,10 @@ def test_mnist(self):
         self.assertFalse((Y_test == test_y_pred).all())
 
         scores = classifier.evaluate(X_train_adv_min, Y_train)
-        print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100))
+        print('\naccuracy on adversarial train examples with minimal perturbation: %.2f%%' % (scores[1] * 100))
 
         scores = classifier.evaluate(X_test_adv_min, Y_test)
-        print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
+        print('\naccuracy on adversarial test examples with minimal perturbation: %.2f%%' % (scores[1] * 100))
 
     def test_with_preprocessing(self):