Skip to content

Commit 3aba2c4

Browse files
Irina NicolaeIrina Nicolae
authored andcommitted
Merge from dev
2 parents 3db3535 + 47e042a commit 3aba2c4

File tree

12 files changed

+501
-208
lines changed

12 files changed

+501
-208
lines changed

run_tests.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1-
python -m unittest discover src/ -p '*_unittest.py'
1+
#!/usr/bin/env bash
2+
python -m unittest discover src/ -p '*_unittest.py'

src/attacks/attack.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,12 @@ def class_derivative(preds, x, num_labels=10):
3939
if sys.version_info >= (3, 4):
4040
ABC = abc.ABC
4141
else:
42-
ABC = abc.ABCMeta('ABC', (), {})
42+
ABC = abc.ABCMeta(str('ABC'), (), {})
4343

4444

4545
class Attack(ABC):
4646
"""
47-
Abstract base class for all attack classes. Adapted from cleverhans (https://github.com/openai/cleverhans).
47+
Abstract base class for all attack classes.
4848
"""
4949
attack_params = ['classifier', 'session']
5050

src/attacks/carlini.py

Lines changed: 208 additions & 42 deletions
Large diffs are not rendered by default.

src/attacks/carlini_unittest.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
from __future__ import absolute_import, division, print_function
2-
3-
from config import config_dict
1+
from __future__ import absolute_import, division, print_function, unicode_literals
42

53
import keras.backend as k
64
import tensorflow as tf

src/attacks/fast_gradient.py

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
1-
from __future__ import absolute_import, division, print_function
1+
from __future__ import absolute_import, division, print_function, unicode_literals
22

3-
from config import config_dict
4-
5-
from cleverhans.attacks_tf import fgm
63
from keras import backend as k
74
import numpy as np
85
import tensorflow as tf
@@ -13,25 +10,25 @@
1310
class FastGradientMethod(Attack):
1411
"""
1512
This attack was originally implemented by Goodfellow et al. (2015) with the infinity norm (and is known as the "Fast
16-
Gradient Sign Method"). This implementation is inspired by the one in Cleverhans
17-
(https://github.com/tensorflow/cleverhans) which extends the attack to other norms, and is therefore called the Fast
13+
Gradient Sign Method"). This implementation extends the attack to other norms, and is therefore called the Fast
1814
Gradient Method. Paper link: https://arxiv.org/abs/1412.6572
1915
"""
20-
attack_params = ['ord', 'y', 'y_val', 'clip_min', 'clip_max']
16+
attack_params = ['ord', 'y', 'y_val', 'targeted', 'clip_min', 'clip_max']
2117

22-
def __init__(self, classifier, sess=None, ord=np.inf, y=None, clip_min=None, clip_max=None):
18+
def __init__(self, classifier, sess=None, ord=np.inf, y=None, targeted=False, clip_min=None, clip_max=None):
2319
"""Create a FastGradientMethod instance.
24-
:param ord: (optional) Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2.
20+
:param ord: (optional) Order of the norm. Possible values: np.inf, 1 or 2.
2521
:param y: (optional) A placeholder for the model labels. Only provide this parameter if you'd like to use true
2622
labels when crafting adversarial samples. Otherwise, model predictions are used as labels to avoid the
2723
"label leaking" effect (explained in this paper: https://arxiv.org/abs/1611.01236). Default is None.
2824
Labels should be one-hot-encoded.
25+
:param targeted: (optional boolean) Should the attack target one specific class
2926
:param clip_min: (optional float) Minimum input component value
3027
:param clip_max: (optional float) Maximum input component value
3128
"""
3229
super(FastGradientMethod, self).__init__(classifier, sess)
3330

34-
kwargs = {'ord': ord, 'clip_min': clip_min, 'clip_max': clip_max, 'y': y}
31+
kwargs = {'ord': ord, 'targeted': targeted, 'clip_min': clip_min, 'clip_max': clip_max, 'y': y}
3532
self.set_params(**kwargs)
3633

3734
def generate_graph(self, x, eps=0.3, **kwargs):
@@ -48,11 +45,43 @@ def generate_graph(self, x, eps=0.3, **kwargs):
4845
"""
4946
self.set_params(**kwargs)
5047

51-
return fgm(x, self.classifier._get_predictions(x, log=False), y=self.y, eps=eps, ord=self.ord,
52-
clip_min=self.clip_min, clip_max=self.clip_max)
48+
preds = self.classifier._get_predictions(x, log=False)
49+
50+
if not hasattr(self, 'y') or self.y is None:
51+
# Use model predictions as correct outputs
52+
preds_max = tf.reduce_max(preds, 1, keep_dims=True)
53+
y = tf.to_float(tf.equal(preds, preds_max))
54+
y = tf.stop_gradient(y)
55+
else:
56+
y = self.y
57+
y = y / tf.reduce_sum(y, 1, keep_dims=True)
58+
59+
loss = tf.nn.softmax_cross_entropy_with_logits(logits=preds, labels=y)
60+
if self.targeted:
61+
loss = -loss
62+
grad, = tf.gradients(loss, x)
63+
64+
# Apply norm bound
65+
if self.ord == np.inf:
66+
grad = tf.sign(grad)
67+
elif self.ord == 1:
68+
ind = list(range(1, len(x.get_shape())))
69+
grad = grad / tf.reduce_sum(tf.abs(grad), reduction_indices=ind, keep_dims=True)
70+
elif self.ord == 2:
71+
ind = list(range(1, len(x.get_shape())))
72+
grad = grad / tf.sqrt(tf.reduce_sum(tf.square(grad), reduction_indices=ind, keep_dims=True))
73+
74+
# Apply perturbation and clip
75+
x_adv_op = x + eps * grad
76+
if self.clip_min is not None and self.clip_max is not None:
77+
x_adv_op = tf.clip_by_value(x_adv_op, self.clip_min, self.clip_max)
78+
79+
return x_adv_op
5380

5481
def minimal_perturbations(self, x, x_val, eps_step=0.1, eps_max=1., **kwargs):
55-
"""Iteratively compute the minimal perturbation necessary to make the class prediction change.
82+
"""Iteratively compute the minimal perturbation necessary to make the class prediction change. Stop when the
83+
first adversarial example was found.
84+
5685
:param x: (required) A placeholder for the input.
5786
:param x_val: (required) A Numpy array with the original inputs.
5887
:param eps_step: (optional float) The increase in the perturbation for each iteration
@@ -67,14 +96,14 @@ def minimal_perturbations(self, x, x_val, eps_step=0.1, eps_max=1., **kwargs):
6796
eps = eps_step
6897

6998
while len(curr_indexes) != 0 and eps <= eps_max:
70-
# adversarial crafting
99+
# Adversarial crafting
71100
adv_x_op = self.generate_graph(x, eps=eps, **kwargs)
72101
adv_y = tf.argmax(self.model(adv_x_op), 1)
73102

74103
feed_dict = {x: x_val[curr_indexes], k.learning_phase(): 0}
75104
new_adv_x, new_y = self.sess.run([adv_x_op, adv_y], feed_dict=feed_dict)
76105

77-
# update
106+
# Update
78107
adv_x[curr_indexes] = new_adv_x
79108
curr_indexes = np.where(y[curr_indexes] == new_y)[0]
80109

@@ -93,6 +122,7 @@ def generate(self, x_val, **kwargs):
93122
Labels should be one-hot-encoded.
94123
:param clip_min: (optional float) Minimum input component value
95124
:param clip_max: (optional float) Maximum input component value
125+
:return: A Numpy array holding the adversarial examples.
96126
"""
97127

98128
input_shape = list(x_val.shape)

src/attacks/fast_gradient_unittest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from __future__ import absolute_import, division, print_function
1+
from __future__ import absolute_import, division, print_function, unicode_literals
22

33
import unittest
44

0 commit comments

Comments
 (0)