Add epsilon as parameter in FGSM

Irina Nicolae · Irina Nicolae · commit 74d04f86e9c5 · 2018-03-23T12:42:40.000Z
diff --git a/src/attacks/fast_gradient.py b/src/attacks/fast_gradient.py
@@ -13,11 +13,12 @@ class FastGradientMethod(Attack):
     Gradient Sign Method"). This implementation extends the attack to other norms, and is therefore called the Fast
     Gradient Method. Paper link: https://arxiv.org/abs/1412.6572
     """
-    attack_params = ['ord', 'y', 'y_val', 'targeted', 'clip_min', 'clip_max']
+    attack_params = ['ord', 'eps', 'y', 'y_val', 'targeted', 'clip_min', 'clip_max']
 
-    def __init__(self, classifier, sess=None, ord=np.inf, y=None, targeted=False, clip_min=None, clip_max=None):
+    def __init__(self, classifier, sess=None, ord=np.inf, eps=.3, y=None, targeted=False, clip_min=0, clip_max=1):
         """Create a FastGradientMethod instance.
         :param ord: (optional) Order of the norm. Possible values: np.inf, 1 or 2.
+        :param eps: (required float) attack step size (input variation)
         :param y: (optional) A placeholder for the model labels. Only provide this parameter if you'd like to use true
                   labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
                   "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None.
@@ -28,13 +29,13 @@ def __init__(self, classifier, sess=None, ord=np.inf, y=None, targeted=False, cl
         """
         super(FastGradientMethod, self).__init__(classifier, sess)
 
-        kwargs = {'ord': ord, 'targeted': targeted, 'clip_min': clip_min, 'clip_max': clip_max, 'y': y}
+        kwargs = {'ord': ord, 'eps': eps, 'targeted': targeted, 'clip_min': clip_min, 'clip_max': clip_max, 'y': y}
         self.set_params(**kwargs)
 
-    def generate_graph(self, x, eps, **kwargs):
+    def generate_graph(self, x_op, eps_op, **kwargs):
         """Generate symbolic graph for adversarial examples and return.
-        :param x: The model's symbolic inputs.
-        :param eps: (optional tf.placeholder) The placeholder for input variation (noise amplitude)
+        :param x_op: The model's symbolic inputs.
+        :param eps_op: (optional tf.placeholder) The placeholder for input variation (noise amplitude)
         :param ord: (optional) Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2.
         :param y: (optional) A placeholder for the model labels. Only provide this parameter if you'd like to use true
                   labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
@@ -45,7 +46,7 @@ def generate_graph(self, x, eps, **kwargs):
         """
         self.set_params(**kwargs)
 
-        preds = self.classifier._get_predictions(x, log=False)
+        preds = self.classifier._get_predictions(x_op, log=False)
 
         if not hasattr(self, 'y') or self.y is None:
             # Use model predictions as correct outputs
@@ -59,20 +60,20 @@ def generate_graph(self, x, eps, **kwargs):
         loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y)
         if self.targeted:
             loss = -loss
-        grad, = tf.gradients(loss, x)
+        grad, = tf.gradients(loss, x_op)
 
         # Apply norm bound
         if self.ord == np.inf:
             grad = tf.sign(grad)
         elif self.ord == 1:
-            ind = list(range(1, len(x.get_shape())))
+            ind = list(range(1, len(x_op.get_shape())))
             grad = grad / tf.reduce_sum(tf.abs(grad), reduction_indices=ind, keep_dims=True)
         elif self.ord == 2:
-            ind = list(range(1, len(x.get_shape())))
+            ind = list(range(1, len(x_op.get_shape())))
             grad = grad / tf.sqrt(tf.reduce_sum(tf.square(grad), reduction_indices=ind, keep_dims=True))
 
         # Apply perturbation and clip
-        x_adv_op = x + eps * grad
+        x_adv_op = x_op + eps_op * grad
         if self.clip_min is not None and self.clip_max is not None:
             x_adv_op = tf.clip_by_value(x_adv_op, self.clip_min, self.clip_max)
 
@@ -133,16 +134,17 @@ def generate(self, x_val, **kwargs):
             return self.minimal_perturbations(self._x, x_val, **kwargs)
 
         # Generate computation graph
-        self._x_adv = self.generate_graph(self._x, **kwargs)
+        eps = tf.placeholder(tf.float32, None)
+        self._x_adv = self.generate_graph(self._x, eps, **kwargs)
 
         # Run symbolic graph without or with true labels
         if 'y_val' not in kwargs or kwargs['y_val'] is None:
-            feed_dict = {self._x: x_val}
+            feed_dict = {self._x: x_val, eps: self.eps}
         else:
             # Verify label placeholder was given in params if using true labels
             if self.y is None:
                 raise Exception("True labels given but label placeholder not given.")
-            feed_dict = {self._x: x_val, self.y: kwargs['y_val']}
+            feed_dict = {self._x: x_val, self.y: kwargs['y_val'], eps: self.eps}
 
         return self.sess.run(self._x_adv, feed_dict=feed_dict)
 
@@ -152,6 +154,7 @@ def set_params(self, **kwargs):
 
         Attack-specific parameters:
         :param ord: (optional) Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2.
+        :param eps: (required float) attack step size (input variation)
         :param y: (optional) A placeholder for the model labels. Only provide this parameter if you'd like to use true
                   labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
                   "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None.
@@ -166,4 +169,7 @@ def set_params(self, **kwargs):
         if self.ord not in [np.inf, int(1), int(2)]:
             raise ValueError("Norm order must be either np.inf, 1, or 2.")
 
+        if self.eps <= self.clip_min or self.eps > self.clip_max:
+            raise ValueError('The amount of perturbation has to be in the data range.')
+
         return True