Merge from dev

Irina Nicolae · Irina Nicolae · commit a43204906dd2 · 2018-05-10T14:54:39.000+01:00
diff --git a/art/attacks/universal_perturbation.py b/art/attacks/universal_perturbation.py
@@ -81,6 +81,7 @@ def generate(self, x, **kwargs):
         # Instantiate the middle attacker and get the predicted labels
         attacker = self._get_attack(self.attacker, self.attacker_params)
         pred_y = self.classifier.predict(x, logits=False)
+        pred_y_max = np.argmax(pred_y, axis=1)
 
         # Start to generate the adversarial examples
         nb_iter = 0
@@ -92,14 +93,14 @@ def generate(self, x, **kwargs):
             for j, ex in enumerate(x[rnd_idx]):
                 xi = ex[None, ...]
 
-                f_xi = self.classifier.predict(xi + v, logits=False)
+                f_xi = self.classifier.predict(xi + v, logits=True)
                 fk_i_hat = np.argmax(f_xi[0])
                 fk_hat = np.argmax(pred_y[rnd_idx][j])
 
                 if fk_i_hat == fk_hat:
                     # Compute adversarial perturbation
                     adv_xi = attacker.generate(xi + v)
-                    adv_f_xi = self.classifier.predict(adv_xi, logits=False)
+                    adv_f_xi = self.classifier.predict(adv_xi, logits=True)
                     adv_fk_i_hat = np.argmax(adv_f_xi[0])
 
                     # If the class has changed, update v
@@ -112,10 +113,8 @@ def generate(self, x, **kwargs):
 
             # Compute the error rate
             adv_x = x + v
-            adv_y = self.classifier.predict(adv_x, logits=False)
-            adv_y_max = np.argmax(adv_y, axis=1)
-            pred_y_max = np.argmax(pred_y, axis=1)
-            fooling_rate = np.sum(pred_y_max != adv_y_max) / float(nb_instances)
+            adv_y = np.argmax(self.classifier.predict(adv_x, logits=False))
+            fooling_rate = np.sum(pred_y_max != adv_y) / nb_instances
 
         self.fooling_rate = fooling_rate
         self.converged = (nb_iter < self.max_iter)
@@ -213,4 +212,3 @@ def _get_class(self, class_name):
         class_module = getattr(module, sub_mods[-1])
 
         return class_module
-
diff --git a/art/attacks/virtual_adversarial.py b/art/attacks/virtual_adversarial.py
@@ -56,17 +56,23 @@ def generate(self, x, **kwargs):
 
         for ind, val in enumerate(x_adv):
             d = np.random.randn(*dims)
-            e = np.random.randn(*dims)
+            
             for _ in range(self.max_iter):
-                d = self.finite_diff * self._normalize(d)
-                e = self.finite_diff * self._normalize(e)
-                preds_new = self.classifier.predict(np.stack((val + d, val + e)))
-
-                # Compute KL divergence between logits
+                d = self._normalize(d)
+                preds_new = self.classifier.predict((val + d)[None, ...], logits=False)
+                
                 from scipy.stats import entropy
                 kl_div1 = entropy(preds[ind], preds_new[0])
-                kl_div2 = entropy(preds[ind], preds_new[1])
-                d = (kl_div1 - kl_div2) / np.abs(d - e)
+                
+                # TODO remove for loop
+                d_new = d
+                for i in range(*dims):
+                    d[i] += self.finite_diff
+                    preds_new = self.classifier.predict((val + d)[None, ...], logits=False)
+                    kl_div2 = entropy(preds[ind], preds_new[0])                    
+                    d_new[i] = (kl_div2-kl_div1)/self.finite_diff
+                    d[i] -= self.finite_diff
+                d = d_new
 
             # Apply perturbation and clip
             val = np.clip(val + self.eps * self._normalize(d), clip_min, clip_max)
@@ -88,7 +94,6 @@ def _normalize(x):
         dims = x.shape
 
         x = x.flatten()
-        x /= np.max(np.abs(x)) + tol
         inverse = (np.sum(x**2) + np.sqrt(tol)) ** -.5
         x = x * inverse
         x = np.reshape(x, dims)
diff --git a/art/classifiers/classifier.py b/art/classifiers/classifier.py
@@ -3,8 +3,6 @@
 import abc
 import sys
 
-# TODO Add tests for defences on classifier
-
 # Ensure compatibility with Python 2 and 3 when using ABCMeta
 if sys.version_info >= (3, 4):
     ABC = abc.ABC
@@ -25,7 +23,6 @@ def __init__(self, clip_values, defences=None):
         :type clip_values: `tuple`
         """
         self._clip_values = clip_values
-        self._parse_defences(defences)
 
     def predict(self, inputs, logits=False):
         """
diff --git a/art/classifiers/keras.py b/art/classifiers/keras.py
@@ -20,8 +20,6 @@ def __init__(self, clip_values, model, use_logits=False, defences=None):
         :type model: `keras.models.Sequential`
         :param use_logits: True if the output of the model are the logits
         :type use_logits: `bool`
-        :param defences: Defences to be activated with the classifier.
-        :type defences: `str` or `list(str)`
         """
         import keras.backend as k
 
diff --git a/art/classifiers/tensorflow.py b/art/classifiers/tensorflow.py
@@ -38,7 +38,7 @@ def __init__(self, clip_values, input_ph, logits, output_ph=None, train=None, lo
         """
         import tensorflow as tf
 
-        super(TFClassifier, self).__init__(clip_values)
+        super(TFClassifier, self).__init__(clip_values, defences)
         self._nb_classes = int(logits.get_shape()[-1])
         self._input_shape = tuple(input_ph.get_shape()[1:])
         self._input_ph = input_ph
@@ -178,4 +178,3 @@ def loss_gradient(self, inputs, labels):
         grds = self._sess.run(self._loss_grads, feed_dict={self._input_ph: inputs, self._output_ph: labels})
 
         return grds
-
diff --git a/art/defences/preprocessor.py b/art/defences/preprocessor.py
@@ -30,45 +30,41 @@ def is_fitted(self):
         :return: `True` if the preprocessing model has been fitted (if this applies).
         :rtype: `bool`
         """
-        return self._input_shape
+        return self._is_fitted
 
     @abc.abstractmethod
     def __call__(self, x, y=None):
         """
         Perform data preprocessing and return preprocessed data as tuple.
 
-        :param x: (np.ndarray) Dataset to be preprocessed
-        :param y: (np.ndarray) Labels to be preprocessed
+        :param x: Dataset to be preprocessed.
+        :type x: `np.ndarray`
+        :param y: Labels to be preprocessed.
+        :type y: `np.ndarray`
         :return: Preprocessed data
         """
-        pass
+        raise NotImplementedError
 
     @abc.abstractmethod
     def fit(self, x, y=None, **kwargs):
         """
         Fit the parameters of the data preprocessor if it has any.
 
-        :param x: (np.ndarray) Training set to fit the preprocessor
-        :param y: (np.ndarray) Labels for the training set
-        :param kwargs: (dict) Other parameters
+        :param x: Training set to fit the preprocessor.
+        :type x: `np.ndarray`
+        :param y: Labels for the training set.
+        :type y: `np.ndarray`
+        :param kwargs: Other parameters.
+        :type kwargs: `dict`
         :return: None
         """
         self._is_fitted = True
 
-    def predict(self, x, y=None):
-        """
-        Perform data preprocessing and return preprocessed data as tuple.
-
-        :param x: (np.ndarray) Dataset to be preprocessed
-        :param y: (np.ndarray) Labels to be preprocessed
-        :return: Preprocessed data
-        """
-        return self.__call__(x, y)
-
     def set_params(self, **kwargs):
         """
         Take in a dictionary of parameters and apply checks before saving them as attributes.
-        :return: True when parsing was successful
+
+        :return: `True` when parsing was successful
         """
         for key, value in kwargs.items():
             if key in self.params:
diff --git a/art/detection/__init__.py b/art/detection/__init__.py
@@ -0,0 +1,4 @@
+"""
+Module providing methods for detecting adversarial samples under a common interface.
+"""
+from art.detection.detector import Detector
diff --git a/art/detection/detector.py b/art/detection/detector.py
@@ -0,0 +1,70 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import abc
+import sys
+
+
+# Ensure compatibility with Python 2 and 3 when using ABCMeta
+if sys.version_info >= (3, 4):
+    ABC = abc.ABC
+else:
+    ABC = abc.ABCMeta(str('ABC'), (), {})
+
+
+class Detector(ABC):
+    """
+    Base abstract class for all detection methods.
+    """
+    def __init__(self):
+        """
+        Create a detector.
+        """
+        self._is_fitted = False
+
+    @property
+    def is_fitted(self):
+        """
+        Return the state of the detector.
+
+        :return: `True` if the detection model has been fitted (if this applies).
+        :rtype: `bool`
+        """
+        return self._is_fitted
+
+    @abc.abstractmethod
+    def fit(self, x, y=None, **kwargs):
+        """
+        Fit the detector using training data (if this applies).
+
+        :param x: Training set to fit the detector.
+        :type x: `np.ndarray`
+        :param y: Labels for the training set.
+        :type y: `np.ndarray`
+        :param kwargs: Other parameters.
+        :type kwargs: `dict`
+        :return: None
+        """
+        self._is_fitted = True
+
+    @abc.abstractmethod
+    def __call__(self, x):
+        """
+        Perform detection of adversarial data and return preprocessed data as tuple.
+
+        :param x: Data sample on which to perform detection.
+        :type x: `np.ndarray`
+        :return: Per-sample prediction whether data is adversarial or not, where `0` means non-adversarial.
+                 Return variable has the same `batch_size` (first dimension) as `x`.
+        :rtype: `np.ndarray`
+        """
+        raise NotImplementedError
+
+    def set_params(self, **kwargs):
+        """
+        Take in a dictionary of parameters and apply checks before saving them as attributes.
+        :return: True when parsing was successful
+        """
+        for key, value in kwargs.items():
+            if key in self.params:
+                setattr(self, key, value)
+        return True
diff --git a/docs/index.rst b/docs/index.rst
@@ -49,6 +49,7 @@ The following defense methods are also supported:
    modules/attacks
    modules/classifiers
    modules/defences
+   modules/detection
    modules/metrics
    modules/utils
 
diff --git a/docs/modules/detection.rst b/docs/modules/detection.rst
@@ -0,0 +1,7 @@
+:mod:`art.detection`
+===================
+
+Base Class
+----------
+.. autoclass:: Detector
+   :members:

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +"""
 +Module providing methods for detecting adversarial samples under a common interface.
 +"""
 +from art.detection.detector import Detector