Skip to content

Commit af085aa

Browse files
MARIA NICOLAEGitHub Enterprise
authored andcommitted
Merge pull request #99 from MATHSINN/fix-cw
Fix C&W attack
2 parents 95067ea + 2920cf5 commit af085aa

File tree

1 file changed

+57
-20
lines changed

1 file changed

+57
-20
lines changed

art/attacks/carlini.py

Lines changed: 57 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414

1515
class CarliniL2Method(Attack):
1616
"""
17-
The L_2 optimized attack of Carlini and Wagner (2016). This attack is the most efficient and should be used as the
18-
primary attack to evaluate potential defences (wrt the L_0 and L_inf attacks). This implementation is inspired by
19-
the one in Cleverhans, which reproduces the authors' original code (https://github.com/carlini/nn_robust_attacks).
20-
Paper link: https://arxiv.org/pdf/1608.04644.pdf
17+
The L_2 optimized attack of Carlini and Wagner (2016). This attack is among the most effective and should be used
18+
among the primary attacks to evaluate potential defences. A major difference wrt to the original implementation
19+
(https://github.com/carlini/nn_robust_attacks) is that we use line search in the optimization of the attack
20+
objective. Paper link: https://arxiv.org/pdf/1608.04644.pdf
2121
"""
2222
attack_params = Attack.attack_params + ['confidence', 'targeted', 'learning_rate', 'max_iter',
2323
'binary_search_steps', 'initial_const', 'max_halving', 'max_doubling']
@@ -193,7 +193,7 @@ def generate(self, x, **kwargs):
193193
:return: An array holding the adversarial examples.
194194
:rtype: `np.ndarray`
195195
"""
196-
x_adv = x.copy().astype(NUMPY_DTYPE)
196+
x_adv = x.astype(NUMPY_DTYPE)
197197
(clip_min, clip_max) = self.classifier.clip_values
198198

199199
# Parse and save attack-specific parameters
@@ -209,7 +209,8 @@ def generate(self, x, **kwargs):
209209
if y is None:
210210
y = get_labels_np_array(self.classifier.predict(x, logits=False))
211211

212-
for j, (ex, target) in enumerate(zip(x_adv, y)):
212+
for j, (ex, target) in enumerate(zip(x_adv, y)):
213+
logger.debug('Processing sample %i out of %i', j, x_adv.shape[0])
213214
image = ex.copy()
214215

215216
# The optimization is performed in tanh space to keep the
@@ -223,62 +224,98 @@ def generate(self, x, **kwargs):
223224

224225
# Initialize placeholders for best l2 distance and attack found so far
225226
best_l2dist = sys.float_info.max
226-
best_adv_image = image
227-
lr = self.learning_rate
227+
best_adv_image = image
228228

229-
for _ in range(self.binary_search_steps):
229+
for bss in range(self.binary_search_steps):
230+
lr = self.learning_rate
231+
logger.debug('Binary search step %i out of %i (c==%f)', bss, self.binary_search_steps, c)
230232

231233
# Initialize perturbation in tanh space:
232234
adv_image = image
233235
adv_image_tanh = image_tanh
234236
z, l2dist, loss = self._loss(image, adv_image, target, c)
235237
attack_success = (loss - l2dist <= 0)
238+
overall_attack_success = attack_success
236239

237-
for it in range(self.max_iter):
240+
for it in range(self.max_iter):
241+
logger.debug('Iteration step %i out of %i', it, self.max_iter)
242+
logger.debug('Total Loss: %f', loss)
243+
logger.debug('L2Dist: %f', l2dist)
244+
logger.debug('Margin Loss: %f', loss-l2dist)
245+
238246
if attack_success:
239-
break
247+
logger.debug('Margin Loss <= 0 --> Attack Success!')
248+
if l2dist < best_l2dist:
249+
logger.debug('New best L2Dist: %f (previous=%f)', l2dist, best_l2dist)
250+
best_l2dist = l2dist
251+
best_adv_image = adv_image
240252

241253
# compute gradient:
254+
logger.debug('Compute loss gradient')
242255
perturbation_tanh = -self._loss_gradient(z, target, image, adv_image, adv_image_tanh,
243256
c, clip_min, clip_max)
244257

245258
# perform line search to optimize perturbation
246259
# first, halve the learning rate until perturbation actually decreases the loss:
247260
prev_loss = loss
261+
best_loss = loss
262+
best_lr = 0
263+
248264
halving = 0
249-
while loss >= prev_loss and loss - l2dist > 0 and halving < self.max_halving:
265+
while loss >= prev_loss and halving < self.max_halving:
266+
logger.debug('Apply gradient with learning rate %f (halving=%i)', lr, halving)
250267
new_adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
251268
new_adv_image = self._tanh_to_original(new_adv_image_tanh, clip_min, clip_max)
252-
_, l2dist, loss = self._loss(image, new_adv_image, target, c)
269+
_, l2dist, loss = self._loss(image, new_adv_image, target, c)
270+
logger.debug('New Total Loss: %f', loss)
271+
logger.debug('New L2Dist: %f', l2dist)
272+
logger.debug('New Margin Loss: %f', loss-l2dist)
273+
if loss < best_loss:
274+
best_loss = loss
275+
best_lr = lr
253276
lr /= 2
254277
halving += 1
255278
lr *= 2
256279

257280
# if no halving was actually required, double the learning rate as long as this
258281
# decreases the loss:
259-
if halving == 1:
282+
if halving == 1 and loss <= prev_loss:
260283
doubling = 0
261284
while loss <= prev_loss and doubling < self.max_doubling:
262285
prev_loss = loss
263286
lr *= 2
287+
logger.debug('Apply gradient with learning rate %f (doubling=%i)', lr, doubling)
264288
doubling += 1
265289
new_adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
266290
new_adv_image = self._tanh_to_original(new_adv_image_tanh, clip_min, clip_max)
267-
_, l2dist, loss = self._loss(image, new_adv_image, target, c)
291+
_, l2dist, loss = self._loss(image, new_adv_image, target, c)
292+
logger.debug('New Total Loss: %f', loss)
293+
logger.debug('New L2Dist: %f', l2dist)
294+
logger.debug('New Margin Loss: %f', loss-l2dist)
295+
if loss < best_loss:
296+
best_loss = loss
297+
best_lr = lr
268298
lr /= 2
269299

270-
# apply the optimal learning rate that was found and update the loss:
271-
adv_image_tanh = adv_image_tanh + lr * perturbation_tanh
272-
adv_image = self._tanh_to_original(adv_image_tanh, clip_min, clip_max)
300+
if best_lr >0:
301+
logger.debug('Finally apply gradient with learning rate %f', best_lr)
302+
# apply the optimal learning rate that was found and update the loss:
303+
adv_image_tanh = adv_image_tanh + best_lr * perturbation_tanh
304+
adv_image = self._tanh_to_original(adv_image_tanh, clip_min, clip_max)
305+
273306
z, l2dist, loss = self._loss(image, adv_image, target, c)
274307
attack_success = (loss - l2dist <= 0)
308+
overall_attack_success = overall_attack_success or attack_success
275309

276310
# Update depending on attack success:
277311
if attack_success:
312+
logger.debug('Margin Loss <= 0 --> Attack Success!')
278313
if l2dist < best_l2dist:
314+
logger.debug('New best L2Dist: %f (previous=%f)', l2dist, best_l2dist)
279315
best_l2dist = l2dist
280316
best_adv_image = adv_image
281-
317+
318+
if overall_attack_success:
282319
c_double = False
283320
c = (c_lower_bound + c) / 2
284321
else:
@@ -301,7 +338,7 @@ def generate(self, x, **kwargs):
301338
else:
302339
preds = np.argmax(self.classifier.predict(x), axis=1)
303340
rate = np.sum(adv_preds != preds) / x_adv.shape[0]
304-
logger.info('Success rate of C&W attack: %.2f%%', rate)
341+
logger.info('Success rate of C&W attack: %.2f%%', 100*rate)
305342

306343
return x_adv
307344

0 commit comments

Comments
 (0)