1414
1515class CarliniL2Method (Attack ):
1616 """
17- The L_2 optimized attack of Carlini and Wagner (2016). This attack is the most efficient and should be used as the
18- primary attack to evaluate potential defences (wrt the L_0 and L_inf attacks). This implementation is inspired by
19- the one in Cleverhans, which reproduces the authors' original code (https://github.com/carlini/nn_robust_attacks).
20- Paper link: https://arxiv.org/pdf/1608.04644.pdf
17+ The L_2 optimized attack of Carlini and Wagner (2016). This attack is among the most effective and should be used
18+ among the primary attacks to evaluate potential defences. A major difference wrt to the original implementation
19+ (https://github.com/carlini/nn_robust_attacks) is that we use line search in the optimization of the attack
20+ objective. Paper link: https://arxiv.org/pdf/1608.04644.pdf
2121 """
2222 attack_params = Attack .attack_params + ['confidence' , 'targeted' , 'learning_rate' , 'max_iter' ,
2323 'binary_search_steps' , 'initial_const' , 'max_halving' , 'max_doubling' ]
@@ -193,7 +193,7 @@ def generate(self, x, **kwargs):
193193 :return: An array holding the adversarial examples.
194194 :rtype: `np.ndarray`
195195 """
196- x_adv = x .copy (). astype (NUMPY_DTYPE )
196+ x_adv = x .astype (NUMPY_DTYPE )
197197 (clip_min , clip_max ) = self .classifier .clip_values
198198
199199 # Parse and save attack-specific parameters
@@ -209,7 +209,8 @@ def generate(self, x, **kwargs):
209209 if y is None :
210210 y = get_labels_np_array (self .classifier .predict (x , logits = False ))
211211
212- for j , (ex , target ) in enumerate (zip (x_adv , y )):
212+ for j , (ex , target ) in enumerate (zip (x_adv , y )):
213+ logger .debug ('Processing sample %i out of %i' , j , x_adv .shape [0 ])
213214 image = ex .copy ()
214215
215216 # The optimization is performed in tanh space to keep the
@@ -223,62 +224,98 @@ def generate(self, x, **kwargs):
223224
224225 # Initialize placeholders for best l2 distance and attack found so far
225226 best_l2dist = sys .float_info .max
226- best_adv_image = image
227- lr = self .learning_rate
227+ best_adv_image = image
228228
229- for _ in range (self .binary_search_steps ):
229+ for bss in range (self .binary_search_steps ):
230+ lr = self .learning_rate
231+ logger .debug ('Binary search step %i out of %i (c==%f)' , bss , self .binary_search_steps , c )
230232
231233 # Initialize perturbation in tanh space:
232234 adv_image = image
233235 adv_image_tanh = image_tanh
234236 z , l2dist , loss = self ._loss (image , adv_image , target , c )
235237 attack_success = (loss - l2dist <= 0 )
238+ overall_attack_success = attack_success
236239
237- for it in range (self .max_iter ):
240+ for it in range (self .max_iter ):
241+ logger .debug ('Iteration step %i out of %i' , it , self .max_iter )
242+ logger .debug ('Total Loss: %f' , loss )
243+ logger .debug ('L2Dist: %f' , l2dist )
244+ logger .debug ('Margin Loss: %f' , loss - l2dist )
245+
238246 if attack_success :
239- break
247+ logger .debug ('Margin Loss <= 0 --> Attack Success!' )
248+ if l2dist < best_l2dist :
249+ logger .debug ('New best L2Dist: %f (previous=%f)' , l2dist , best_l2dist )
250+ best_l2dist = l2dist
251+ best_adv_image = adv_image
240252
241253 # compute gradient:
254+ logger .debug ('Compute loss gradient' )
242255 perturbation_tanh = - self ._loss_gradient (z , target , image , adv_image , adv_image_tanh ,
243256 c , clip_min , clip_max )
244257
245258 # perform line search to optimize perturbation
246259 # first, halve the learning rate until perturbation actually decreases the loss:
247260 prev_loss = loss
261+ best_loss = loss
262+ best_lr = 0
263+
248264 halving = 0
249- while loss >= prev_loss and loss - l2dist > 0 and halving < self .max_halving :
265+ while loss >= prev_loss and halving < self .max_halving :
266+ logger .debug ('Apply gradient with learning rate %f (halving=%i)' , lr , halving )
250267 new_adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
251268 new_adv_image = self ._tanh_to_original (new_adv_image_tanh , clip_min , clip_max )
252- _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
269+ _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
270+ logger .debug ('New Total Loss: %f' , loss )
271+ logger .debug ('New L2Dist: %f' , l2dist )
272+ logger .debug ('New Margin Loss: %f' , loss - l2dist )
273+ if loss < best_loss :
274+ best_loss = loss
275+ best_lr = lr
253276 lr /= 2
254277 halving += 1
255278 lr *= 2
256279
257280 # if no halving was actually required, double the learning rate as long as this
258281 # decreases the loss:
259- if halving == 1 :
282+ if halving == 1 and loss <= prev_loss :
260283 doubling = 0
261284 while loss <= prev_loss and doubling < self .max_doubling :
262285 prev_loss = loss
263286 lr *= 2
287+ logger .debug ('Apply gradient with learning rate %f (doubling=%i)' , lr , doubling )
264288 doubling += 1
265289 new_adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
266290 new_adv_image = self ._tanh_to_original (new_adv_image_tanh , clip_min , clip_max )
267- _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
291+ _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
292+ logger .debug ('New Total Loss: %f' , loss )
293+ logger .debug ('New L2Dist: %f' , l2dist )
294+ logger .debug ('New Margin Loss: %f' , loss - l2dist )
295+ if loss < best_loss :
296+ best_loss = loss
297+ best_lr = lr
268298 lr /= 2
269299
270- # apply the optimal learning rate that was found and update the loss:
271- adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
272- adv_image = self ._tanh_to_original (adv_image_tanh , clip_min , clip_max )
300+ if best_lr > 0 :
301+ logger .debug ('Finally apply gradient with learning rate %f' , best_lr )
302+ # apply the optimal learning rate that was found and update the loss:
303+ adv_image_tanh = adv_image_tanh + best_lr * perturbation_tanh
304+ adv_image = self ._tanh_to_original (adv_image_tanh , clip_min , clip_max )
305+
273306 z , l2dist , loss = self ._loss (image , adv_image , target , c )
274307 attack_success = (loss - l2dist <= 0 )
308+ overall_attack_success = overall_attack_success or attack_success
275309
276310 # Update depending on attack success:
277311 if attack_success :
312+ logger .debug ('Margin Loss <= 0 --> Attack Success!' )
278313 if l2dist < best_l2dist :
314+ logger .debug ('New best L2Dist: %f (previous=%f)' , l2dist , best_l2dist )
279315 best_l2dist = l2dist
280316 best_adv_image = adv_image
281-
317+
318+ if overall_attack_success :
282319 c_double = False
283320 c = (c_lower_bound + c ) / 2
284321 else :
@@ -301,7 +338,7 @@ def generate(self, x, **kwargs):
301338 else :
302339 preds = np .argmax (self .classifier .predict (x ), axis = 1 )
303340 rate = np .sum (adv_preds != preds ) / x_adv .shape [0 ]
304- logger .info ('Success rate of C&W attack: %.2f%%' , rate )
341+ logger .info ('Success rate of C&W attack: %.2f%%' , 100 * rate )
305342
306343 return x_adv
307344
0 commit comments