1414
1515class CarliniL2Method (Attack ):
1616 """
17- The L_2 optimized attack of Carlini and Wagner (2016). This attack is the most efficient and should be used as the
18- primary attack to evaluate potential defences (wrt the L_0 and L_inf attacks). This implementation is inspired by
19- the one in Cleverhans, which reproduces the authors' original code (https://github.com/carlini/nn_robust_attacks).
20- Paper link: https://arxiv.org/pdf/1608.04644.pdf
17+ The L_2 optimized attack of Carlini and Wagner (2016). This attack is among the most effective and should be used
18+ among the primary attacks to evaluate potential defences. A major difference wrt to the original implementation
19+ (https://github.com/carlini/nn_robust_attacks) is that we use line search in the optimization of the attack
20+ objective. Paper link: https://arxiv.org/pdf/1608.04644.pdf
2121 """
2222 attack_params = Attack .attack_params + ['confidence' , 'targeted' , 'learning_rate' , 'max_iter' ,
2323 'binary_search_steps' , 'initial_const' , 'max_halving' , 'max_doubling' ]
@@ -209,8 +209,9 @@ def generate(self, x, **kwargs):
209209 if y is None :
210210 y = get_labels_np_array (self .classifier .predict (x , logits = False ))
211211
212- for j , (ex , target ) in enumerate (zip (x_adv , y )):
213- image = ex .copy ()
212+ for j , (ex , target ) in enumerate (zip (x_adv , y )):
213+ logger .debug ('Processing sample %i out of %i' % (j , x_adv .shape [0 ]))
214+ image = ex .copy ().astype (NUMPY_DTYPE )
214215
215216 # The optimization is performed in tanh space to keep the
216217 # adversarial images bounded from clip_min and clip_max.
@@ -223,22 +224,29 @@ def generate(self, x, **kwargs):
223224
224225 # Initialize placeholders for best l2 distance and attack found so far
225226 best_l2dist = sys .float_info .max
226- best_adv_image = image
227- lr = self .learning_rate
227+ best_adv_image = image
228228
229- for _ in range (self .binary_search_steps ):
229+ for bss in range (self .binary_search_steps ):
230+ lr = self .learning_rate
231+ logger .debug ('Binary search step %i out of %i (c==%f)' % (bss , self .binary_search_steps , c ))
230232
231233 # Initialize perturbation in tanh space:
232234 adv_image = image
233235 adv_image_tanh = image_tanh
234236 z , l2dist , loss = self ._loss (image , adv_image , target , c )
235237 attack_success = (loss - l2dist <= 0 )
236238
237- for it in range (self .max_iter ):
239+ for it in range (self .max_iter ):
240+ logger .debug ('Iteration step %i out of %i' % (it , self .max_iter ))
241+ logger .debug ('Total Loss: %f' , loss )
242+ logger .debug ('L2Dist: %f' , l2dist )
243+ logger .debug ('Margin Loss: %f' , loss - l2dist )
244+
238245 if attack_success :
239246 break
240247
241248 # compute gradient:
249+ logger .debug ('Compute loss gradient' )
242250 perturbation_tanh = - self ._loss_gradient (z , target , image , adv_image , adv_image_tanh ,
243251 c , clip_min , clip_max )
244252
@@ -247,9 +255,13 @@ def generate(self, x, **kwargs):
247255 prev_loss = loss
248256 halving = 0
249257 while loss >= prev_loss and loss - l2dist > 0 and halving < self .max_halving :
258+ logger .debug ('Apply gradient with learning rate %f (halving=%i)' % (lr , halving ))
250259 new_adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
251260 new_adv_image = self ._tanh_to_original (new_adv_image_tanh , clip_min , clip_max )
252- _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
261+ _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
262+ logger .debug ('New Total Loss: %f' , loss )
263+ logger .debug ('New L2Dist: %f' , l2dist )
264+ logger .debug ('New Margin Loss: %f' , loss - l2dist )
253265 lr /= 2
254266 halving += 1
255267 lr *= 2
@@ -261,12 +273,17 @@ def generate(self, x, **kwargs):
261273 while loss <= prev_loss and doubling < self .max_doubling :
262274 prev_loss = loss
263275 lr *= 2
276+ logger .debug ('Apply gradient with learning rate %f (doubling=%i)' % (lr , doubling ))
264277 doubling += 1
265278 new_adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
266279 new_adv_image = self ._tanh_to_original (new_adv_image_tanh , clip_min , clip_max )
267- _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
280+ _ , l2dist , loss = self ._loss (image , new_adv_image , target , c )
281+ logger .debug ('New Total Loss: %f' , loss )
282+ logger .debug ('New L2Dist: %f' , l2dist )
283+ logger .debug ('New Margin Loss: %f' , loss - l2dist )
268284 lr /= 2
269285
286+ logger .debug ('Finally apply gradient with learning rate %f' , lr )
270287 # apply the optimal learning rate that was found and update the loss:
271288 adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
272289 adv_image = self ._tanh_to_original (adv_image_tanh , clip_min , clip_max )
@@ -275,7 +292,9 @@ def generate(self, x, **kwargs):
275292
276293 # Update depending on attack success:
277294 if attack_success :
295+ logger .debug ('Margin Loss <= 0 --> Attack Success!' )
278296 if l2dist < best_l2dist :
297+ logger .debug ('New best L2Dist: %f (previous=%f)' % (l2dist , best_l2dist ))
279298 best_l2dist = l2dist
280299 best_adv_image = adv_image
281300
@@ -301,7 +320,7 @@ def generate(self, x, **kwargs):
301320 else :
302321 preds = np .argmax (self .classifier .predict (x ), axis = 1 )
303322 rate = np .sum (adv_preds != preds ) / x_adv .shape [0 ]
304- logger .info ('Success rate of C&W attack: %.2f%%' , rate )
323+ logger .info ('Success rate of C&W attack: %.2f%%' , 100 * rate )
305324
306325 return x_adv
307326
0 commit comments