Merge bux fix for PyTorch CUDA, JSMA and NewtonFool

Irina Nicolae · Irina Nicolae · commit a34a14049574 · 2018-08-17T22:50:55.000+01:00
diff --git a/art/attacks/newtonfool.py b/art/attacks/newtonfool.py
@@ -119,8 +119,10 @@ def _compute_pert(theta, grads, norm_grad):
         :param norm_grad: norm of gradient values at the attacked class.
         :return: pertubation.
         """
+        # Pick a small scalar to avoid division by 0
+        tol = 10e-8
         nom = -theta * grads
-        denom = norm_grad**2
+        denom = norm_grad**2 if norm_grad > tol else tol
         result = nom / float(denom)
 
         return result
diff --git a/art/attacks/saliency_map.py b/art/attacks/saliency_map.py
@@ -76,7 +76,7 @@ def generate(self, x, **kwargs):
             target = targets[ind]
             all_feat = set()
 
-            while current_pred != target and len(all_feat)/self._nb_features <= self.gamma and bool(search_space):
+            while current_pred != target and len(all_feat) / self._nb_features <= self.gamma and bool(search_space):
                 # Compute saliency map
                 feat1, feat2 = self._saliency_map(np.reshape(val, dims), target, search_space)
 
@@ -144,7 +144,7 @@ def _saliency_map(self, x, target, search_space):
         # Remove gradients for already used features
         used_features = list(set(range(self._nb_features)) - search_space)
         coeff = 2 * int(self.theta > 0) - 1
-        grads[used_features] = - np.max(np.abs(grads)) * coeff
+        grads[used_features] = -np.inf * coeff
 
         if self.theta > 0:
             ind = np.argpartition(grads, -2)[-2:]
diff --git a/art/classifiers/pytorch.py b/art/classifiers/pytorch.py
@@ -42,7 +42,6 @@ def __init__(self, clip_values, model, loss, optimizer, input_shape, nb_classes,
         super(PyTorchClassifier, self).__init__(clip_values=clip_values, channel_index=channel_index, defences=defences,
                                                 preprocessing=preprocessing)
 
-        # self._nb_classes = list(model.modules())[-1 if use_logits else -2].out_features
         self._nb_classes = nb_classes
         self._input_shape = input_shape
         self._model = PyTorchClassifier.ModelWrapper(model)
@@ -57,8 +56,8 @@ def __init__(self, clip_values, model, loss, optimizer, input_shape, nb_classes,
 
         # Use GPU if possible
         import torch
-        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        self._model.to(device)
+        self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self._model.to(self._device)
 
     def predict(self, x, logits=False):
         """
@@ -85,13 +84,14 @@ def predict(self, x, logits=False):
         # if not logits:
         #     exp = np.exp(preds - np.max(preds, axis=1, keepdims=True))
         #     preds = exp / np.sum(exp, axis=1, keepdims=True)
-        model_outputs = self._model(torch.from_numpy(x_).float())
+
+        model_outputs = self._model(torch.from_numpy(x_).to(self._device).float())
         (logit_output, output) = (model_outputs[-2], model_outputs[-1])
 
         if logits:
-            preds = logit_output.detach().numpy()
+            preds = logit_output.detach().cpu().numpy()
         else:
-            preds = output.detach().numpy()
+            preds = output.detach().cpu().numpy()
 
         return preds
 
@@ -130,11 +130,11 @@ def fit(self, x, y, batch_size=128, nb_epochs=10):
             # Train for one epoch
             for m in range(num_batch):
                 if m < num_batch - 1:
-                    i_batch = torch.from_numpy(x_[ind[m * batch_size:(m + 1) * batch_size]])
-                    o_batch = torch.from_numpy(y_[ind[m * batch_size:(m + 1) * batch_size]])
+                    i_batch = torch.from_numpy(x_[ind[m * batch_size:(m + 1) * batch_size]]).to(self._device)
+                    o_batch = torch.from_numpy(y_[ind[m * batch_size:(m + 1) * batch_size]]).to(self._device)
                 else:
-                    i_batch = torch.from_numpy(x_[ind[m * batch_size:]])
-                    o_batch = torch.from_numpy(y_[ind[m * batch_size:]])
+                    i_batch = torch.from_numpy(x_[ind[m * batch_size:]]).to(self._device)
+                    o_batch = torch.from_numpy(y_[ind[m * batch_size:]]).to(self._device)
 
                 # Cast to float
                 i_batch = i_batch.float()
@@ -170,7 +170,7 @@ def class_gradient(self, x, label=None, logits=False):
             raise ValueError('Label %s is out of range.' % label)
 
         # Convert the inputs to Tensors
-        x_ = torch.from_numpy(self._apply_processing(x))
+        x_ = torch.from_numpy(self._apply_processing(x)).to(self._device)
         x_ = x_.float()
         x_.requires_grad = True
 
@@ -191,8 +191,8 @@ def class_gradient(self, x, label=None, logits=False):
         # Compute the gradient
         if label is not None:
             self._model.zero_grad()
-            torch.autograd.backward(preds[:, label], torch.FloatTensor([1] * len(preds[:, 0])), retain_graph=True)
-            grds = x_.grad.numpy().copy()
+            torch.autograd.backward(preds[:, label], torch.Tensor([1.] * len(preds[:, 0])), retain_graph=True)
+            grds = x_.grad.cpu().numpy().copy()
             x_.grad.data.zero_()
 
             grds = np.expand_dims(self._apply_processing_gradient(grds), axis=1)
@@ -201,8 +201,8 @@ def class_gradient(self, x, label=None, logits=False):
             grds = []
             self._model.zero_grad()
             for i in range(self.nb_classes):
-                torch.autograd.backward(preds[:, i], torch.FloatTensor([1] * len(preds[:, 0])), retain_graph=True)
-                grds.append(x_.grad.numpy().copy())
+                torch.autograd.backward(preds[:, i], torch.Tensor([1.] * len(preds[:, 0])), retain_graph=True)
+                grds.append(x_.grad.cpu().numpy().copy())
                 x_.grad.data.zero_()
 
             grds = np.swapaxes(np.array(grds), 0, 1)
@@ -225,12 +225,12 @@ def loss_gradient(self, x, y):
         import torch
 
         # Convert the inputs to Tensors
-        inputs_t = torch.from_numpy(self._apply_processing(x))
+        inputs_t = torch.from_numpy(self._apply_processing(x)).to(self._device)
         inputs_t = inputs_t.float()
         inputs_t.requires_grad = True
 
         # Convert the labels to Tensors
-        labels_t = torch.from_numpy(np.argmax(y, axis=1))
+        labels_t = torch.from_numpy(np.argmax(y, axis=1)).to(self._device)
 
         # Compute the gradient and return
         model_outputs = self._model(inputs_t)
@@ -242,7 +242,7 @@ def loss_gradient(self, x, y):
 
         # Compute gradients
         loss.backward()
-        grds = inputs_t.grad.numpy().copy()
+        grds = inputs_t.grad.cpu().numpy().copy()
         grds = self._apply_processing_gradient(grds)
         assert grds.shape == x.shape
 
@@ -286,7 +286,7 @@ def get_activations(self, x, layer):
         self._model.train(False)
 
         # Run prediction
-        model_outputs = self._model(torch.from_numpy(x).float())[:-1]
+        model_outputs = self._model(torch.from_numpy(x).to(self._device).float())[:-1]
 
         if isinstance(layer, six.string_types):
             if layer not in self._layer_names:
@@ -299,7 +299,7 @@ def get_activations(self, x, layer):
         else:
             raise TypeError("Layer must be of type str or int")
 
-        return model_outputs[layer_index].detach().numpy()
+        return model_outputs[layer_index].detach().cpu().numpy()
 
     # def _forward_at(self, inputs, layer):
     #     """