Skip to content

Commit 3db3535

Browse files
minhitbkGitHub Enterprise
authored andcommitted
Merge pull request #50 from M-N-Tran/master
Implement NewtonFool attack
2 parents da0cd8f + 6729e77 commit 3db3535

File tree

2 files changed

+194
-0
lines changed

2 files changed

+194
-0
lines changed

src/attacks/newtonfool.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
from __future__ import absolute_import, division, print_function
2+
3+
from keras import backend as k
4+
from keras.utils.generic_utils import Progbar
5+
6+
import numpy as np
7+
import tensorflow as tf
8+
9+
from src.attacks.attack import Attack, class_derivative
10+
11+
12+
class NewtonFool(Attack):
13+
"""
14+
Implementation of the attack from Uyeong Jang et al. (2017).
15+
Paper link: http://doi.acm.org/10.1145/3134600.3134635
16+
"""
17+
attack_params = ["max_iter", "eta", "verbose"]
18+
19+
def __init__(self, classifier, sess, max_iter=100, eta=0.01, verbose=1):
20+
"""
21+
Create a NewtonFool attack instance.
22+
:param classifier: An object of classifier.
23+
:param sess: The tf session to run graphs in.
24+
:param max_iter: (integer) The maximum number of iterations.
25+
:param eta: (float) The eta coefficient.
26+
:param verbose: (optional boolean)
27+
"""
28+
super(NewtonFool, self).__init__(classifier, sess)
29+
params = {"max_iter": max_iter, "eta": eta, "verbose": verbose}
30+
self.set_params(**params)
31+
32+
def generate(self, x_val, **kwargs):
33+
"""
34+
Generate adversarial samples and return them in a Numpy array.
35+
:param x_val: (required) A Numpy array with the original inputs.
36+
:return: A Numpy array holding the adversarial examples.
37+
"""
38+
assert self.set_params(**kwargs)
39+
dims = list(x_val.shape)
40+
dims[0] = None
41+
nb_classes = self.model.output_shape[1]
42+
xi_op = tf.placeholder(dtype=tf.float32, shape=dims)
43+
loss = self.classifier.model(xi_op)
44+
grads_graph = class_derivative(loss, xi_op, nb_classes)
45+
x_adv = x_val.copy()
46+
47+
# Progress bar
48+
progress_bar = Progbar(target=len(x_val), verbose=self.verbose)
49+
50+
# Initialize variables
51+
y_pred = self.classifier.model.predict(x_val)
52+
pred_class = np.argmax(y_pred, axis=1)
53+
54+
# Main algorithm for each example
55+
for j, x in enumerate(x_adv):
56+
xi = x[None, ...]
57+
norm_x0 = np.linalg.norm(np.reshape(x, [-1]))
58+
l = pred_class[j]
59+
#d = np.zeros(shape=dims[1:])
60+
61+
# Main loop of the algorithm
62+
for i in range(self.max_iter):
63+
# Compute score
64+
score = self.classifier.model.predict(xi)[0][l]
65+
66+
# Compute the gradients and norm
67+
grads = self.sess.run(grads_graph, feed_dict={xi_op: xi})[l][0]
68+
norm_grad = np.linalg.norm(np.reshape(grads, [-1]))
69+
70+
# Theta
71+
theta = self._compute_theta(norm_x0, score, norm_grad,
72+
nb_classes)
73+
74+
# Pertubation
75+
di = self._compute_pert(theta, grads, norm_grad)
76+
77+
# Update xi and pertubation
78+
xi += di
79+
#d += di
80+
81+
# Return the adversarial example
82+
x_adv[j] = xi[0]
83+
progress_bar.update(current=j, values=[("perturbation", abs(
84+
np.linalg.norm((x_adv[j] - x_val[j]).flatten())))])
85+
86+
return x_adv
87+
88+
def set_params(self, **kwargs):
89+
"""Take in a dictionary of parameters and applies attack-specific
90+
checks before saving them as attributes.
91+
92+
Attack-specific parameters:
93+
:param max_iter: (integer) The maximum number of iterations.
94+
:param eta: (float) The eta coefficient.
95+
:param verbose: (optional boolean)
96+
"""
97+
# Save attack-specific parameters
98+
super(NewtonFool, self).set_params(**kwargs)
99+
100+
if type(self.max_iter) is not int or self.max_iter <= 0:
101+
raise ValueError("The number of iterations must be a "
102+
"positive integer.")
103+
104+
if type(self.eta) is not float or self.eta <= 0:
105+
raise ValueError("The eta coefficient must be a positive float.")
106+
107+
return True
108+
109+
def _compute_theta(self, norm_x0, score, norm_grad, nb_classes):
110+
"""
111+
Function to compute the theta at each step.
112+
:param norm_x0: norm of x0
113+
:param score: softmax value at the attacked class.
114+
:param norm_grad: norm of gradient values at the attacked class.
115+
:param nb_classes: number of classes.
116+
:return: theta value.
117+
"""
118+
equ1 = self.eta * norm_x0 * norm_grad
119+
equ2 = score - 1.0/nb_classes
120+
result = min(equ1, equ2)
121+
122+
return result
123+
124+
def _compute_pert(self, theta, grads, norm_grad):
125+
"""
126+
Function to compute the pertubation at each step.
127+
:param theta: theta value at the current step.
128+
:param grads: gradient values at the attacked class.
129+
:param norm_grad: norm of gradient values at the attacked class.
130+
:return: pertubation.
131+
"""
132+
nom = -theta * grads
133+
denom = norm_grad**2
134+
result = nom / float(denom)
135+
136+
return result
137+
138+

src/attacks/newtonfool_unittest.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
from __future__ import absolute_import, division, print_function
2+
3+
import keras.backend as k
4+
import tensorflow as tf
5+
import unittest
6+
import numpy as np
7+
8+
from src.attacks.newtonfool import NewtonFool
9+
from src.classifiers.cnn import CNN
10+
from src.utils import load_mnist, get_labels_np_array, get_label_conf
11+
12+
13+
class TestNewtonFool(unittest.TestCase):
14+
def test_mnist(self):
15+
session = tf.Session()
16+
k.set_session(session)
17+
18+
comp_params = {"loss": 'categorical_crossentropy',
19+
"optimizer": 'adam',
20+
"metrics": ['accuracy']}
21+
22+
# get MNIST
23+
batch_size, nb_train, nb_test = 100, 1000, 11
24+
(X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
25+
X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
26+
X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
27+
im_shape = X_train[0].shape
28+
29+
# get classifier
30+
classifier = CNN(im_shape, act="relu")
31+
classifier.compile(comp_params)
32+
classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size,
33+
verbose=0)
34+
35+
# Attack
36+
nf = NewtonFool(classifier, sess=session)
37+
nf.set_params(max_iter=20)
38+
x_test_adv = nf.generate(X_test)
39+
self.assertFalse((X_test == x_test_adv).all())
40+
41+
y_pred = classifier.predict(X_test)
42+
y_pred_adv = classifier.predict(x_test_adv)
43+
y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred
44+
y_pred_max = y_pred.max(axis=1)
45+
y_pred_adv_max = y_pred_adv[y_pred_bool]
46+
self.assertTrue((y_pred_max >= y_pred_adv_max).all())
47+
48+
scores1 = classifier.evaluate(X_test, Y_test)
49+
print("\nAccuracy on test set: %.2f%%" % (scores1[1] * 100))
50+
scores2 = classifier.evaluate(x_test_adv, Y_test)
51+
print('\nAccuracy on adversarial examples: %.2f%%' % (scores2[1] * 100))
52+
self.assertTrue(scores1[1] != scores2[1])
53+
54+
55+
if __name__ == '__main__':
56+
unittest.main()

0 commit comments

Comments
 (0)