Trusted-AI
diff --git a/‎art/classifiers/classifier.py‎
Lines changed: 5 additions & 3 deletions b/‎art/classifiers/classifier.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎art/classifiers/keras.py‎
Lines changed: 61 additions & 27 deletions b/‎art/classifiers/keras.py‎
Lines changed: 61 additions & 27 deletions
diff --git a/‎art/classifiers/keras_unittest.py‎
Lines changed: 33 additions & 0 deletions b/‎art/classifiers/keras_unittest.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎art/classifiers/mxnet.py‎
Lines changed: 35 additions & 8 deletions b/‎art/classifiers/mxnet.py‎
Lines changed: 35 additions & 8 deletions
diff --git a/‎art/classifiers/mxnet_unittest.py‎
Lines changed: 7 additions & 0 deletions b/‎art/classifiers/mxnet_unittest.py‎
Lines changed: 7 additions & 0 deletions
@@ -119,9 +119,11 @@ def class_gradient(self, x, label=None, logits=False):
 
         :param x: Sample input with shape as expected by the model.
         :type x: `np.ndarray`
-        :param label: Index of a specific per-class derivative. If `None`, then gradients for all
-                      classes will be computed.
-        :type label: `int`
+        :param label: Index of a specific per-class derivative. If an integer is provided, the gradient of that class
+                      output is computed for all samples. If multiple values as provided, the first dimension should
+                      match the batch size of `x`, and each value will be used as target for its corresponding sample in
+                      `x`. If `None`, then gradients for all classes will be computed for each sample.
+        :type label: `int` or `list`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
         :return: Array of gradients of input features w.r.t. each class in the form
 
@@ -126,39 +126,59 @@ def class_gradient(self, x, label=None, logits=False):
 
         :param x: Sample input with shape as expected by the model.
         :type x: `np.ndarray`
-        :param label: Index of a specific per-class derivative. If `None`, then gradients for all
-                      classes will be computed.
-        :type label: `int`
+        :param label: Index of a specific per-class derivative. If an integer is provided, the gradient of that class
+                      output is computed for all samples. If multiple values as provided, the first dimension should
+                      match the batch size of `x`, and each value will be used as target for its corresponding sample in
+                      `x`. If `None`, then gradients for all classes will be computed for each sample.
+        :type label: `int` or `list`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
         :return: Array of gradients of input features w.r.t. each class in the form
                  `(batch_size, nb_classes, input_shape)` when computing for all classes, otherwise shape becomes
                  `(batch_size, 1, input_shape)` when `label` parameter is specified.
         :rtype: `np.ndarray`
         """
-        if label is not None and label not in range(self._nb_classes):
-            raise ValueError('Label %s is out of range.' % label)
+        # Check value of label for computing gradients
+        if not (label is None or (isinstance(label, (int, np.integer)) and label in range(self.nb_classes))
+                or (type(label) is np.ndarray and len(label.shape) == 1 and (label < self.nb_classes).all()
+                    and label.shape[0] == x.shape[0])):
+            raise ValueError('Label %s is out of range.' % str(label))
 
         self._init_class_grads(label=label, logits=logits)
 
         x_ = self._apply_processing(x)
 
-        if label is not None:
+        if label is None:
+            # Compute the gradients w.r.t. all classes
+            if logits:
+                grads = np.swapaxes(np.array(self._class_grads_logits([x_])), 0, 1)
+            else:
+                grads = np.swapaxes(np.array(self._class_grads([x_])), 0, 1)
+
+            grads = self._apply_processing_gradient(grads)
+
+        elif isinstance(label, (int, np.integer)):
+            # Compute the gradients only w.r.t. the provided label
             if logits:
                 grads = np.swapaxes(np.array(self._class_grads_logits_idx[label]([x_])), 0, 1)
             else:
                 grads = np.swapaxes(np.array(self._class_grads_idx[label]([x_])), 0, 1)
 
             grads = self._apply_processing_gradient(grads)
             assert grads.shape == (x_.shape[0], 1) + self.input_shape
+
         else:
+            # For each sample, compute the gradients w.r.t. the indicated target class (possibly distinct)
+            unique_label = list(np.unique(label))
             if logits:
-                grads = np.swapaxes(np.array(self._class_grads_logits([x_])), 0, 1)
+                grads = np.array([self._class_grads_logits_idx[l]([x_]) for l in unique_label])
             else:
-                grads = np.swapaxes(np.array(self._class_grads([x_])), 0, 1)
+                grads = np.array([self._class_grads_idx[l]([x_]) for l in unique_label])
+            grads = np.swapaxes(np.squeeze(grads, axis=1), 0, 1)
+            lst = [unique_label.index(i) for i in label]
+            grads = np.expand_dims(grads[np.arange(len(grads)), lst], axis=1)
 
             grads = self._apply_processing_gradient(grads)
-            assert grads.shape == (x_.shape[0], self.nb_classes) + self.input_shape
 
         return grads
 
@@ -278,35 +298,49 @@ def _init_class_grads(self, label=None, logits=False):
         import keras.backend as k
         k.set_learning_phase(0)
 
-        if label is not None:
-            logger.debug('Computing class gradients for class %i.', label)
-            if logits:
-                if not hasattr(self, '_class_grads_logits_idx'):
-                    self._class_grads_logits_idx = [None for _ in range(self.nb_classes)]
-
-                if self._class_grads_logits_idx[label] is None:
-                    class_grads_logits = [k.gradients(self._preds_op[:, label], self._input)[0]]
-                    self._class_grads_logits_idx[label] = k.function([self._input], class_grads_logits)
-            else:
-                if not hasattr(self, '_class_grads_idx'):
-                    self._class_grads_idx = [None for _ in range(self.nb_classes)]
-
-                if self._class_grads_idx[label] is None:
-                    class_grads = [k.gradients(k.softmax(self._preds_op)[:, label], self._input)[0]]
-                    self._class_grads_idx[label] = k.function([self._input], class_grads)
+        if len(self._output.shape) == 2:
+            nb_outputs = self._output.shape[1]
         else:
+            raise ValueError('Unexpected output shape for classification in Keras model.')
+
+        if label is None:
             logger.debug('Computing class gradients for all %i classes.', self.nb_classes)
             if logits:
                 if not hasattr(self, '_class_grads_logits'):
                     class_grads_logits = [k.gradients(self._preds_op[:, i], self._input)[0]
-                                          for i in range(self.nb_classes)]
+                                          for i in range(nb_outputs)]
                     self._class_grads_logits = k.function([self._input], class_grads_logits)
             else:
                 if not hasattr(self, '_class_grads'):
                     class_grads = [k.gradients(k.softmax(self._preds_op)[:, i], self._input)[0]
-                                   for i in range(self.nb_classes)]
+                                   for i in range(nb_outputs)]
                     self._class_grads = k.function([self._input], class_grads)
 
+        else:
+            if type(label) is int:
+                unique_labels = [label]
+                logger.debug('Computing class gradients for class %i.', label)
+            else:
+                unique_labels = np.unique(label)
+                logger.debug('Computing class gradients for classes %s.', str(unique_labels))
+
+            if logits:
+                if not hasattr(self, '_class_grads_logits_idx'):
+                    self._class_grads_logits_idx = [None for _ in range(nb_outputs)]
+
+                for l in unique_labels:
+                    if self._class_grads_logits_idx[l] is None:
+                        class_grads_logits = [k.gradients(self._preds_op[:, l], self._input)[0]]
+                        self._class_grads_logits_idx[l] = k.function([self._input], class_grads_logits)
+            else:
+                if not hasattr(self, '_class_grads_idx'):
+                    self._class_grads_idx = [None for _ in range(nb_outputs)]
+
+                for l in unique_labels:
+                    if self._class_grads_idx[l] is None:
+                        class_grads = [k.gradients(k.softmax(self._preds_op)[:, l], self._input)[0]]
+                        self._class_grads_idx[l] = k.function([self._input], class_grads)
+
     def _get_layers(self):
         """
         Return the hidden layers in the model, if applicable.
 
@@ -139,6 +139,39 @@ def _test_shapes(self, custom_activation=False):
         loss_grads = classifier.loss_gradient(x_test[:11], y_test[:11])
         self.assertTrue(loss_grads.shape == x_test[:11].shape)
 
+    def test_class_gradient(self):
+        (_, _), (x_test, y_test) = self.mnist
+        classifier = KerasClassifier((0, 1), self.model_mnist)
+
+        # Test all gradients label
+        grads = classifier.class_gradient(x_test)
+
+        self.assertTrue(np.array(grads.shape == (NB_TEST, 10, 28, 28, 1)).all())
+        self.assertTrue(np.sum(grads) != 0)
+
+        # Test 1 gradient label = 5
+        grads = classifier.class_gradient(x_test, label=5)
+
+        self.assertTrue(np.array(grads.shape == (NB_TEST, 1, 28, 28, 1)).all())
+        self.assertTrue(np.sum(grads) != 0)
+
+        # Test a set of gradients label = array
+        label = np.random.randint(5, size=NB_TEST)
+        grads = classifier.class_gradient(x_test, label=label)
+
+        self.assertTrue(np.array(grads.shape == (NB_TEST, 1, 28, 28, 1)).all())
+        self.assertTrue(np.sum(grads) != 0)
+
+    def test_loss_gradient(self):
+        (_, _), (x_test, y_test) = self.mnist
+        classifier = KerasClassifier((0, 1), self.model_mnist)
+
+        # Test gradient
+        grads = classifier.loss_gradient(x_test, y_test)
+
+        self.assertTrue(np.array(grads.shape == (NB_TEST, 28, 28, 1)).all())
+        self.assertTrue(np.sum(grads) != 0)
+
     def test_functional_model(self):
         self._test_functional_model(custom_activation=True)
         self._test_functional_model(custom_activation=False)
 
@@ -151,9 +151,11 @@ def class_gradient(self, x, label=None, logits=False):
 
         :param x: Sample input with shape as expected by the model.
         :type x: `np.ndarray`
-        :param label: Index of a specific per-class derivative. If `None`, then gradients for all
-                      classes will be computed.
-        :type label: `int`
+        :param label: Index of a specific per-class derivative. If an integer is provided, the gradient of that class
+                      output is computed for all samples. If multiple values as provided, the first dimension should
+                      match the batch size of `x`, and each value will be used as target for its corresponding sample in
+                      `x`. If `None`, then gradients for all classes will be computed for each sample.
+        :type label: `int` or `list`
         :param logits: `True` if the prediction should be done at the logits layer.
         :type logits: `bool`
         :return: Array of gradients of input features w.r.t. each class in the form
@@ -163,14 +165,31 @@ def class_gradient(self, x, label=None, logits=False):
         """
         from mxnet import autograd, nd
 
-        if label is not None and label not in range(self._nb_classes):
-            raise ValueError('Label %s is out of range.' % label)
+        # Check value of label for computing gradients
+        if not (label is None or (isinstance(label, (int, np.integer)) and label in range(self.nb_classes))
+                or (type(label) is np.ndarray and len(label.shape) == 1 and (label < self.nb_classes).all()
+                    and label.shape[0] == x.shape[0])):
+            raise ValueError('Label %s is out of range.' % str(label))
 
         x_ = self._apply_processing(x)
         x_ = nd.array(x_, ctx=self._ctx)
         x_.attach_grad()
 
-        if label is not None:
+        if label is None:
+            with autograd.record(train_mode=False):
+                if logits is True:
+                    preds = self._model(x_)
+                else:
+                    preds = self._model(x_).softmax()
+                class_slices = [preds[:, i] for i in range(self.nb_classes)]
+
+            grads = []
+            for slice_ in class_slices:
+                slice_.backward(retain_graph=True)
+                grad = x_.grad.asnumpy()
+                grads.append(grad)
+            grads = np.swapaxes(np.array(grads), 0, 1)
+        elif isinstance(label, (int, np.integer)):
             with autograd.record(train_mode=False):
                 if logits is True:
                     preds = self._model(x_)
@@ -181,19 +200,26 @@ def class_gradient(self, x, label=None, logits=False):
             class_slice.backward()
             grads = np.expand_dims(x_.grad.asnumpy(), axis=1)
         else:
+            unique_labels = list(np.unique(label))
+
             with autograd.record(train_mode=False):
                 if logits is True:
                     preds = self._model(x_)
                 else:
                     preds = self._model(x_).softmax()
-                class_slices = [preds[:, i] for i in range(self.nb_classes)]
+                class_slices = [preds[:, i] for i in unique_labels]
 
             grads = []
             for slice_ in class_slices:
                 slice_.backward(retain_graph=True)
                 grad = x_.grad.asnumpy()
                 grads.append(grad)
+
             grads = np.swapaxes(np.array(grads), 0, 1)
+            lst = [unique_labels.index(i) for i in label]
+            grads = grads[np.arange(len(grads)), lst]
+            grads = np.expand_dims(grads, axis=1)
+            grads = self._apply_processing_gradient(grads)
 
         grads = self._apply_processing_gradient(grads)
 
@@ -220,7 +246,8 @@ def loss_gradient(self, x, y):
         with autograd.record(train_mode=False):
             preds = self._model(x_)
             loss = loss(preds, y_)
-            loss.backward()
+
+        loss.backward()
         grads = x_.grad.asnumpy()
         grads = self._apply_processing_gradient(grads)
         assert grads.shape == x.shape
 
@@ -81,6 +81,13 @@ def test_class_gradient(self):
         # Assert gradient computed for the same class on same input are equal
         self.assertAlmostEqual(np.sum(grads_all[:, 3] - grads), 0, places=6)
 
+        # Test a set of gradients label = array
+        labels = np.random.randint(5, size=NB_TEST)
+        grads = self.classifier.class_gradient(x_test, label=labels)
+
+        self.assertTrue(np.array(grads.shape == (NB_TEST, 1, 1, 28, 28)).all())
+        self.assertTrue(np.sum(grads) != 0)
+
     def test_loss_gradient(self):
         # Get MNIST
         (_, _), (x_test, y_test) = self.mnist