Trusted-AI
diff --git a/‎art/attacks/carlini.py‎
Lines changed: 11 additions & 7 deletions b/‎art/attacks/carlini.py‎
Lines changed: 11 additions & 7 deletions
diff --git a/‎art/attacks/carlini_unittest.py‎
Lines changed: 149 additions & 8 deletions b/‎art/attacks/carlini_unittest.py‎
Lines changed: 149 additions & 8 deletions
diff --git a/‎art/attacks/newtonfool.py‎
Lines changed: 4 additions & 0 deletions b/‎art/attacks/newtonfool.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎art/attacks/newtonfool_unittest.py‎
Lines changed: 59 additions & 2 deletions b/‎art/attacks/newtonfool_unittest.py‎
Lines changed: 59 additions & 2 deletions
@@ -12,7 +12,7 @@
 class CarliniL2Method(Attack):
     """
     The L_2 optimized attack of Carlini and Wagner (2016). This attack is the most efficient and should be used as the
-    primary attack to evaluate potential defenses (wrt the L_0 and L_inf attacks). This implementation is inspired by
+    primary attack to evaluate potential defences (wrt the L_0 and L_inf attacks). This implementation is inspired by
     the one in Cleverhans, which reproduces the authors' original code (https://github.com/carlini/nn_robust_attacks).
     Paper link: https://arxiv.org/pdf/1608.04644.pdf
     """
@@ -160,7 +160,7 @@ def generate(self, x, **kwargs):
 
         # No labels provided, use model prediction as correct class
         if y is None:
-            y = np.argmax(self.classifier.predict(inputs=x, logits=False), axis=1)
+            y = np.argmax(self.classifier.predict(x, logits=False), axis=1)
             y = to_categorical(y, self.classifier.nb_classes)
 
         # Images to be attacked:
@@ -240,7 +240,11 @@ def generate(self, x, **kwargs):
                 # Abort binary search if c exceeds upper bound:
                 if c > self._c_upper_bound:
                     break
-            
+
+            # Transform best_adv_image back into tanh space if attack is failed
+            if (best_adv_image == ex).all():
+                best_adv_image = (np.tanh(best_adv_image) / self._tanh_smoother + 1) / 2
+
             x_adv[j] = best_adv_image
 
         return x_adv
@@ -270,10 +274,10 @@ def set_params(self, **kwargs):
         # Save attack-specific parameters
         super(CarliniL2Method, self).set_params(**kwargs)
 
-        if type(self.binary_search_steps) is not int or self.binary_search_steps <= 0:
-            raise ValueError("The number of binary search steps must be a positive integer.")
+        if type(self.binary_search_steps) is not int or self.binary_search_steps < 0:
+            raise ValueError("The number of binary search steps must be a non-negative integer.")
 
-        if type(self.max_iter) is not int or self.max_iter <= 0:
-            raise ValueError("The number of iterations must be a positive integer.")
+        if type(self.max_iter) is not int or self.max_iter < 0:
+            raise ValueError("The number of iterations must be a non-negative integer.")
 
         return True
@@ -7,17 +7,84 @@
 import keras.backend as k
 from keras.models import Sequential
 from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
 
 from art.attacks.carlini import CarliniL2Method
 from art.classifiers.tensorflow import TFClassifier
 from art.classifiers.keras import KerasClassifier
+from art.classifiers.pytorch import PyTorchClassifier
 from art.utils import load_mnist, random_targets
 
 
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.conv = nn.Conv2d(1, 16, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.fc = nn.Linear(2304, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv(x)))
+        x = x.view(-1, 2304)
+        logit_output = self.fc(x)
+        output = F.softmax(logit_output, dim=1)
+
+        return logit_output, output
+
+
 class TestCarliniL2(unittest.TestCase):
     """
     A unittest class for testing the Carlini2 attack.
     """
+    def test_failure_attack(self):
+        """
+        Test the corner case when attack is failed.
+        :return:
+        """
+        # Build a TFClassifier
+        # Define input and output placeholders
+        self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
+        self._output_ph = tf.placeholder(tf.int32, shape=[None, 10])
+
+        # Define the tensorflow graph
+        conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu)
+        conv = tf.layers.max_pooling2d(conv, 2, 2)
+        fc = tf.contrib.layers.flatten(conv)
+
+        # Logits layer
+        self._logits = tf.layers.dense(fc, 10)
+
+        # Train operator
+        self._loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph))
+        optimizer = tf.train.AdamOptimizer(learning_rate=0.01)
+        self._train = optimizer.minimize(self._loss)
+
+        # Tensorflow session and initialization
+        self._sess = tf.Session()
+        self._sess.run(tf.global_variables_initializer())
+
+        # Get MNIST
+        batch_size, nb_train, nb_test = 100, 1000, 10
+        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
+        x_train, y_train = x_train[:nb_train], y_train[:nb_train]
+        x_test, y_test = x_test[:nb_test], y_test[:nb_test]
+
+        # Train the classifier
+        tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph,
+                           self._train, self._loss, None, self._sess)
+        tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)
+
+        # Failure attack
+        cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0,
+                               learning_rate=2e-2, initial_const=3, decay=1e-2)
+        params = {'y': random_targets(y_test, tfc.nb_classes)}
+        x_test_adv = cl2m.generate(x_test, **params)
+        self.assertTrue((x_test_adv <= 1.0001 ).all())
+        self.assertTrue((x_test_adv >= -0.0001 ).all())
+        np.testing.assert_almost_equal(x_test, x_test_adv, 3)
+
     def test_tfclassifier(self):
         """
         First test with the TFClassifier.
@@ -46,7 +113,7 @@ def test_tfclassifier(self):
         self._sess.run(tf.global_variables_initializer())
 
         # Get MNIST
-        batch_size, nb_train, nb_test = 100, 1000, 10
+        batch_size, nb_train, nb_test = 100, 500, 5
         (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
         x_train, y_train = x_train[:nb_train], y_train[:nb_train]
         x_test, y_test = x_test[:nb_test], y_test[:nb_test]
@@ -57,31 +124,38 @@ def test_tfclassifier(self):
         tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)
 
         # First attack
-        cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=100, binary_search_steps=10,
+        cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=10, binary_search_steps=10,
                                learning_rate=2e-2, initial_const=3, decay=1e-2)
         params = {'y': random_targets(y_test, tfc.nb_classes)}
         x_test_adv = cl2m.generate(x_test, **params)
         self.assertFalse((x_test == x_test_adv).all())
+        #print(x_test_adv)
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
         target = np.argmax(params['y'], axis=1)
         y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
         self.assertTrue((target == y_pred_adv).all())
 
         # Second attack
-        cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=100, binary_search_steps=10,
+        cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=10, binary_search_steps=10,
                                learning_rate=2e-2, initial_const=3, decay=1e-2)
         params = {'y': random_targets(y_test, tfc.nb_classes)}
         x_test_adv = cl2m.generate(x_test, **params)
         self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
         target = np.argmax(params['y'], axis=1)
         y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
         self.assertTrue((target != y_pred_adv).all())
 
         # Third attack
-        cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=100, binary_search_steps=10,
+        cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=10, binary_search_steps=10,
                                learning_rate=2e-2, initial_const=3, decay=1e-2)
         params = {}
         x_test_adv = cl2m.generate(x_test, **params)
         self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
         y_pred = np.argmax(tfc.predict(x_test), axis=1)
         y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
         self.assertTrue((y_pred != y_pred_adv).all())
@@ -96,7 +170,7 @@ def test_krclassifier(self):
         k.set_session(session)
 
         # Get MNIST
-        batch_size, nb_train, nb_test = 100, 1000, 10
+        batch_size, nb_train, nb_test = 100, 500, 5
         (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
         x_train, y_train = x_train[:nb_train], y_train[:nb_train]
         x_test, y_test = x_test[:nb_test], y_test[:nb_test]
@@ -116,35 +190,102 @@ def test_krclassifier(self):
         krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2)
 
         # First attack
-        cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=100, binary_search_steps=10,
+        cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=10, binary_search_steps=10,
                                learning_rate=2e-2, initial_const=3, decay=1e-2)
         params = {'y': random_targets(y_test, krc.nb_classes)}
         x_test_adv = cl2m.generate(x_test, **params)
         self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
         target = np.argmax(params['y'], axis=1)
         y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
         self.assertTrue((target == y_pred_adv).any())
 
         # Second attack
-        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10,
+        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=10, binary_search_steps=10,
                                learning_rate=2e-2, initial_const=3, decay=1e-2)
         params = {'y': random_targets(y_test, krc.nb_classes)}
         x_test_adv = cl2m.generate(x_test, **params)
         self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
         target = np.argmax(params['y'], axis=1)
         y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
         self.assertTrue((target != y_pred_adv).all())
 
         # Third attack
-        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10,
+        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=10, binary_search_steps=10,
                                learning_rate=2e-2, initial_const=3, decay=1e-2)
         params = {}
         x_test_adv = cl2m.generate(x_test, **params)
         self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
         y_pred = np.argmax(krc.predict(x_test), axis=1)
         y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
         self.assertTrue((y_pred != y_pred_adv).any())
 
+    def test_ptclassifier(self):
+        """
+        Third test with the PyTorchClassifier.
+        :return:
+        """
+        # Get MNIST
+        batch_size, nb_train, nb_test = 100, 1000, 10
+        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
+        x_train, y_train = x_train[:nb_train], np.argmax(y_train[:nb_train], axis=1)
+        x_test, y_test = x_test[:nb_test], y_test[:nb_test]
+        x_train = np.swapaxes(x_train, 1, 3)
+        x_test = np.swapaxes(x_test, 1, 3)
+
+        # Create simple CNN
+        # Define the network
+        model = Model()
+
+        # Define a loss function and optimizer
+        loss_fn = nn.CrossEntropyLoss()
+        optimizer = optim.Adam(model.parameters(), lr=0.01)
+
+        # Get classifier
+        ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), (10,))
+        ptc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=1)
+
+        # First attack
+        cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=100, binary_search_steps=10,
+                               learning_rate=2e-2, initial_const=3, decay=1e-2)
+        params = {'y': random_targets(y_test, ptc.nb_classes)}
+        x_test_adv = cl2m.generate(x_test, **params)
+        self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
+        target = np.argmax(params['y'], axis=1)
+        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
+        self.assertTrue((target == y_pred_adv).any())
+
+        # Second attack
+        cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=100, binary_search_steps=10,
+                               learning_rate=2e-2, initial_const=3, decay=1e-2)
+        params = {'y': random_targets(y_test, ptc.nb_classes)}
+        x_test_adv = cl2m.generate(x_test, **params)
+        self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
+        target = np.argmax(params['y'], axis=1)
+        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
+        self.assertTrue((target != y_pred_adv).all())
+
+        # Third attack
+        cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=100, binary_search_steps=10,
+                               learning_rate=2e-2, initial_const=3, decay=1e-2)
+        params = {}
+        x_test_adv = cl2m.generate(x_test, **params)
+        self.assertFalse((x_test == x_test_adv).all())
+        self.assertTrue((x_test_adv <= 1.0001).all())
+        self.assertTrue((x_test_adv >= -0.0001).all())
+        y_pred = np.argmax(ptc.predict(x_test), axis=1)
+        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
+        self.assertTrue((y_pred != y_pred_adv).any())
+
 
 if __name__ == '__main__':
     unittest.main()
@@ -42,6 +42,7 @@ def generate(self, x, **kwargs):
         x_adv = x.copy()
 
         # Initialize variables
+        clip_min, clip_max = self.classifier.clip_values
         y_pred = self.classifier.predict(x, logits=False)
         pred_class = np.argmax(y_pred, axis=1)
 
@@ -68,6 +69,9 @@ def generate(self, x, **kwargs):
                 # Update xi and pertubation
                 ex += di
 
+            # Apply clip
+            x_adv[j] = np.clip(ex, clip_min, clip_max)
+
         return x_adv
 
     def set_params(self, **kwargs):
 
@@ -6,13 +6,34 @@
 from keras.models import Sequential
 from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
 import tensorflow as tf
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
 
 from art.attacks.newtonfool import NewtonFool
 from art.classifiers.tensorflow import TFClassifier
 from art.classifiers.keras import KerasClassifier
+from art.classifiers.pytorch import PyTorchClassifier
 from art.utils import load_mnist
 
 
+class Model(nn.Module):
+    def __init__(self):
+        super(Model, self).__init__()
+        self.conv = nn.Conv2d(1, 16, 5)
+        self.pool = nn.MaxPool2d(2, 2)
+        self.fc = nn.Linear(2304, 10)
+
+    def forward(self, x):
+        x = self.pool(F.relu(self.conv(x)))
+        x = x.view(-1, 2304)
+        logit_output = self.fc(x)
+        output = F.softmax(logit_output, dim=1)
+
+        return logit_output, output
+
+
 class TestNewtonFool(unittest.TestCase):
     """
     A unittest class for testing the NewtonFool attack.
@@ -106,8 +127,44 @@ def test_krclassifier(self):
         y_pred_adv_max = y_pred_adv[y_pred_bool]
         self.assertTrue((y_pred_max >= y_pred_adv_max).all())
 
+    def test_ptclassifier(self):
+        """
+        Third test with the PyTorchClassifier.
+        :return:
+        """
+        # Get MNIST
+        batch_size, nb_train, nb_test = 100, 1000, 10
+        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
+        x_train, y_train = x_train[:nb_train], np.argmax(y_train[:nb_train], axis=1)
+        x_test, y_test = x_test[:nb_test], np.argmax(y_test[:nb_test], axis=1)
+        x_train = np.swapaxes(x_train, 1, 3)
+        x_test = np.swapaxes(x_test, 1, 3)
+
+        # Create simple CNN
+        # Define the network
+        model = Model()
 
-if __name__ == '__main__':
-    unittest.main()
+        # Define a loss function and optimizer
+        loss_fn = nn.CrossEntropyLoss()
+        optimizer = optim.Adam(model.parameters(), lr=0.01)
 
+        # Get classifier
+        ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), (10,))
+        ptc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=1)
+
+        # Attack
+        nf = NewtonFool(ptc)
+        nf.set_params(max_iter=5)
+        x_test_adv = nf.generate(x_test)
+        self.assertFalse((x_test == x_test_adv).all())
+
+        y_pred = ptc.predict(x_test)
+        y_pred_adv = ptc.predict(x_test_adv)
+        y_pred_bool = y_pred.max(axis=1, keepdims=1) == y_pred
+        y_pred_max = y_pred.max(axis=1)
+        y_pred_adv_max = y_pred_adv[y_pred_bool]
+        self.assertTrue((y_pred_max >= y_pred_adv_max).all())
 
+
+if __name__ == '__main__':
+    unittest.main()