1- from __future__ import absolute_import , division , print_function
1+ from __future__ import absolute_import , division , print_function , unicode_literals
22
3- from config import config_dict
4-
5- from cleverhans .attacks_tf import fgm
63from keras import backend as k
74import numpy as np
85import tensorflow as tf
1310class FastGradientMethod (Attack ):
1411 """
1512 This attack was originally implemented by Goodfellow et al. (2015) with the infinity norm (and is known as the "Fast
16- Gradient Sign Method"). This implementation is inspired by the one in Cleverhans
17- (https://github.com/tensorflow/cleverhans) which extends the attack to other norms, and is therefore called the Fast
13+ Gradient Sign Method"). This implementation extends the attack to other norms, and is therefore called the Fast
1814 Gradient Method. Paper link: https://arxiv.org/abs/1412.6572
1915 """
20- attack_params = ['ord' , 'y' , 'y_val' , 'clip_min' , 'clip_max' ]
16+ attack_params = ['ord' , 'y' , 'y_val' , 'targeted' , ' clip_min' , 'clip_max' ]
2117
22- def __init__ (self , classifier , sess = None , ord = np .inf , y = None , clip_min = None , clip_max = None ):
18+ def __init__ (self , classifier , sess = None , ord = np .inf , y = None , targeted = False , clip_min = None , clip_max = None ):
2319 """Create a FastGradientMethod instance.
24- :param ord: (optional) Order of the norm (mimics Numpy) . Possible values: np.inf, 1 or 2.
20+ :param ord: (optional) Order of the norm. Possible values: np.inf, 1 or 2.
2521 :param y: (optional) A placeholder for the model labels. Only provide this parameter if you'd like to use true
2622 labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
2723 "label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None.
2824 Labels should be one-hot-encoded.
25+ :param targeted: (optional boolean) Should the attack target one specific class
2926 :param clip_min: (optional float) Minimum input component value
3027 :param clip_max: (optional float) Maximum input component value
3128 """
3229 super (FastGradientMethod , self ).__init__ (classifier , sess )
3330
34- kwargs = {'ord' : ord , 'clip_min' : clip_min , 'clip_max' : clip_max , 'y' : y }
31+ kwargs = {'ord' : ord , 'targeted' : targeted , ' clip_min' : clip_min , 'clip_max' : clip_max , 'y' : y }
3532 self .set_params (** kwargs )
3633
3734 def generate_graph (self , x , eps = 0.3 , ** kwargs ):
@@ -48,11 +45,43 @@ def generate_graph(self, x, eps=0.3, **kwargs):
4845 """
4946 self .set_params (** kwargs )
5047
51- return fgm (x , self .classifier ._get_predictions (x , log = False ), y = self .y , eps = eps , ord = self .ord ,
52- clip_min = self .clip_min , clip_max = self .clip_max )
48+ preds = self .classifier ._get_predictions (x , log = False )
49+
50+ if not hasattr (self , 'y' ) or self .y is None :
51+ # Use model predictions as correct outputs
52+ preds_max = tf .reduce_max (preds , 1 , keep_dims = True )
53+ y = tf .to_float (tf .equal (preds , preds_max ))
54+ y = tf .stop_gradient (y )
55+ else :
56+ y = self .y
57+ y = y / tf .reduce_sum (y , 1 , keep_dims = True )
58+
59+ loss = tf .nn .softmax_cross_entropy_with_logits (logits = preds , labels = y )
60+ if self .targeted :
61+ loss = - loss
62+ grad , = tf .gradients (loss , x )
63+
64+ # Apply norm bound
65+ if self .ord == np .inf :
66+ grad = tf .sign (grad )
67+ elif self .ord == 1 :
68+ ind = list (range (1 , len (x .get_shape ())))
69+ grad = grad / tf .reduce_sum (tf .abs (grad ), reduction_indices = ind , keep_dims = True )
70+ elif self .ord == 2 :
71+ ind = list (range (1 , len (x .get_shape ())))
72+ grad = grad / tf .sqrt (tf .reduce_sum (tf .square (grad ), reduction_indices = ind , keep_dims = True ))
73+
74+ # Apply perturbation and clip
75+ x_adv_op = x + eps * grad
76+ if self .clip_min is not None and self .clip_max is not None :
77+ x_adv_op = tf .clip_by_value (x_adv_op , self .clip_min , self .clip_max )
78+
79+ return x_adv_op
5380
5481 def minimal_perturbations (self , x , x_val , eps_step = 0.1 , eps_max = 1. , ** kwargs ):
55- """Iteratively compute the minimal perturbation necessary to make the class prediction change.
82+ """Iteratively compute the minimal perturbation necessary to make the class prediction change. Stop when the
83+ first adversarial example was found.
84+
5685 :param x: (required) A placeholder for the input.
5786 :param x_val: (required) A Numpy array with the original inputs.
5887 :param eps_step: (optional float) The increase in the perturbation for each iteration
@@ -67,14 +96,14 @@ def minimal_perturbations(self, x, x_val, eps_step=0.1, eps_max=1., **kwargs):
6796 eps = eps_step
6897
6998 while len (curr_indexes ) != 0 and eps <= eps_max :
70- # adversarial crafting
99+ # Adversarial crafting
71100 adv_x_op = self .generate_graph (x , eps = eps , ** kwargs )
72101 adv_y = tf .argmax (self .model (adv_x_op ), 1 )
73102
74103 feed_dict = {x : x_val [curr_indexes ], k .learning_phase (): 0 }
75104 new_adv_x , new_y = self .sess .run ([adv_x_op , adv_y ], feed_dict = feed_dict )
76105
77- # update
106+ # Update
78107 adv_x [curr_indexes ] = new_adv_x
79108 curr_indexes = np .where (y [curr_indexes ] == new_y )[0 ]
80109
@@ -93,6 +122,7 @@ def generate(self, x_val, **kwargs):
93122 Labels should be one-hot-encoded.
94123 :param clip_min: (optional float) Minimum input component value
95124 :param clip_max: (optional float) Maximum input component value
125+ :return: A Numpy array holding the adversarial examples.
96126 """
97127
98128 input_shape = list (x_val .shape )
0 commit comments