Merge pull request #1118 from TrojAISec/dev_1.7.0

beat-buesser · web-flow · commit 86baa09ffeaf · 2021-06-11T16:51:27.000+01:00
Support for Binary Classification with PGD
diff --git a/AUTHORS b/AUTHORS
@@ -14,3 +14,4 @@
 - AGH University of Science and Technology
 - Rensselaer Polytechnic Institute (RPI)
 - IMT Atlantique
+- Troj.AI
diff --git a/art/attacks/evasion/fast_gradient.py b/art/attacks/evasion/fast_gradient.py
@@ -231,7 +231,9 @@ def generate(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> n
                 # Use model predictions as correct outputs
                 logger.info("Using model predictions as correct labels for FGM.")
                 y = get_labels_np_array(self.estimator.predict(x, batch_size=self.batch_size))  # type: ignore
-            y = y / np.sum(y, axis=1, keepdims=True)
+
+            if self.estimator.nb_classes > 2:
+                y = y / np.sum(y, axis=1, keepdims=True)
 
             # Return adversarial examples computed with minimal perturbation if option is active
             rate_best: Optional[float]
diff --git a/art/attacks/inference/attribute_inference/meminf_based.py b/art/attacks/inference/attribute_inference/meminf_based.py
@@ -56,7 +56,7 @@ def __init__(
         Create an AttributeInferenceMembership attack instance.
 
         :param classifier: Target classifier.
-        :param membership_attack: The membership inference attack to use. Should be fit/callibrated in advance, and
+        :param membership_attack: The membership inference attack to use. Should be fit/calibrated in advance, and
                                   should support returning probabilities.
         :param attack_feature: The index of the feature to be attacked or a slice representing multiple indexes in
                                case of a one-hot encoded feature.
@@ -106,10 +106,10 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
                 predicted = self.membership_attack.infer(x_value, y, probabilities=True)
                 if first:
-                    probabilities = predicted[:, 1].reshape(-1, 1)
+                    probabilities = predicted
                     first = False
                 else:
-                    probabilities = np.hstack((probabilities, predicted[:, 1].reshape(-1, 1)))
+                    probabilities = np.hstack((probabilities, predicted))
 
             # needs to be of type float so we can later replace back the actual values
             value_indexes = np.argmax(probabilities, axis=1).astype(np.float32)
@@ -130,9 +130,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
                 predicted = self.membership_attack.infer(x_value, y, probabilities=True)
                 if first:
-                    probabilities = predicted[:, 1].reshape(-1, 1)
+                    probabilities = predicted
                 else:
-                    probabilities = np.hstack((probabilities, predicted[:, 1].reshape(-1, 1)))
+                    probabilities = np.hstack((probabilities, predicted))
                 first = False
             value_indexes = np.argmax(probabilities, axis=1).astype(np.float32)
             pred_values = np.zeros_like(probabilities)
diff --git a/art/attacks/inference/membership_inference/black_box.py b/art/attacks/inference/membership_inference/black_box.py
@@ -292,11 +292,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
             if inferred is not None:
                 if not probabilities:
-                    inferred_return = inferred.reshape(-1).astype(np.int)
+                    inferred_return = np.round(inferred)
                 else:
-                    inferred = inferred.reshape(-1)
-                    prob_0 = np.ones_like(inferred) - inferred
-                    inferred_return = np.stack((prob_0, inferred), axis=1)
+                    inferred_return = inferred
             else:
                 raise ValueError("No data available.")
         elif not self.default_model:
@@ -305,13 +303,13 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
             if probabilities:
                 inferred_return = pred
             else:
-                inferred_return = np.array([np.argmax(arr) for arr in pred])
+                inferred_return = np.round(pred)
         else:
             pred = self.attack_model.predict_proba(np.c_[features, y])  # type: ignore
             if probabilities:
-                inferred_return = pred
+                inferred_return = pred[:, [1]]
             else:
-                inferred_return = np.array([np.argmax(arr) for arr in pred])
+                inferred_return = np.round(pred[:, [1]])
 
         return inferred_return
 
diff --git a/art/estimators/classification/classifier.py b/art/estimators/classification/classifier.py
@@ -105,6 +105,8 @@ def nb_classes(self) -> int:
 
         :return: Number of classes in the data.
         """
+        if self._nb_classes < 2:
+            raise ValueError("nb_classes must be greater than or equal to 2.")
         return self._nb_classes  # type: ignore
 
 
diff --git a/art/estimators/classification/keras.py b/art/estimators/classification/keras.py
@@ -178,6 +178,9 @@ def _initialize_params(
             self._output_layer = 0
 
         _, self._nb_classes = k.int_shape(self._output)
+        # Check for binary classification
+        if self._nb_classes == 1:
+            self._nb_classes = 2
         self._input_shape = k.int_shape(self._input)[1:]
         logger.debug(
             "Inferred %i classes and %s as input shape for Keras classifier.",
diff --git a/art/estimators/classification/pytorch.py b/art/estimators/classification/pytorch.py
@@ -40,7 +40,7 @@
 from art.utils import check_and_transform_label_format
 
 if TYPE_CHECKING:
-    # pylint: disable=C0412
+    # pylint: disable=C0412, C0302
     import torch
 
     from art.utils import CLIP_VALUES_TYPE, PREPROCESSING_TYPE
@@ -266,21 +266,26 @@ def reduce_labels(self, y: Union[np.ndarray, "torch.Tensor"]) -> Union[np.ndarra
         """
         Reduce labels from one-hot encoded to index labels.
         """
+        # pylint: disable=R0911
         import torch  # lgtm [py/repeated-import]
 
         # Check if the loss function requires as input index labels instead of one-hot-encoded labels
-        if self._reduce_labels and self._int_labels:
-            if isinstance(y, torch.Tensor):
-                return torch.argmax(y, dim=1)
-            return np.argmax(y, axis=1)
-
-        if self._reduce_labels:  # float labels
+        # Checking for exactly 2 classes to support binary classification
+        if self.nb_classes > 2:
+            if self._reduce_labels and self._int_labels:
+                if isinstance(y, torch.Tensor):
+                    return torch.argmax(y, dim=1)
+                return np.argmax(y, axis=1)
+            if self._reduce_labels:  # float labels
+                if isinstance(y, torch.Tensor):
+                    return torch.argmax(y, dim=1).type("torch.FloatTensor")
+                y_index = np.argmax(y, axis=1).astype(np.float32)
+                y_index = np.expand_dims(y_index, axis=1)
+                return y_index
+        else:
             if isinstance(y, torch.Tensor):
-                return torch.argmax(y, dim=1).type("torch.FloatTensor")
-            y_index = np.argmax(y, axis=1).astype(np.float32)
-            y_index = np.expand_dims(y_index, axis=1)
-            return y_index
-
+                return y.float()
+            return y.astype(np.float32)
         return y
 
     def predict(  # pylint: disable=W0221
@@ -302,8 +307,9 @@ def predict(  # pylint: disable=W0221
         # Apply preprocessing
         x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
 
+        results_list = []
+
         # Run prediction with batch processing
-        results = np.zeros((x_preprocessed.shape[0], self.nb_classes), dtype=np.float32)
         num_batch = int(np.ceil(len(x_preprocessed) / float(batch_size)))
         for m in range(num_batch):
             # Batch indexes
@@ -315,8 +321,13 @@ def predict(  # pylint: disable=W0221
             with torch.no_grad():
                 model_outputs = self._model(torch.from_numpy(x_preprocessed[begin:end]).to(self._device))
             output = model_outputs[-1]
-            results[begin:end] = output.detach().cpu().numpy()
+            output = output.detach().cpu().numpy().astype(np.float32)
+            if len(output.shape) == 1:
+                output = np.expand_dims(output.detach().cpu().numpy(), axis=1).astype(np.float32)
+
+            results_list.append(output)
 
+        results = np.vstack(results_list)
         # Apply postprocessing
         predictions = self._apply_postprocessing(preds=results, fit=False)
 
@@ -577,7 +588,12 @@ def hook(grad):
 
         self._model.zero_grad()
         if label is None:
-            for i in range(self.nb_classes):
+            if len(preds.shape) == 1 or preds.shape[1] == 1:
+                num_outputs = 1
+            else:
+                num_outputs = self.nb_classes
+
+            for i in range(num_outputs):
                 torch.autograd.backward(
                     preds[:, i],
                     torch.tensor([1.0] * len(preds[:, 0])).to(self._device),
diff --git a/art/estimators/classification/tensorflow.py b/art/estimators/classification/tensorflow.py
@@ -1037,6 +1037,9 @@ def class_gradient(  # pylint: disable=W0221
 
                         class_gradient = tape.gradient(prediction, x_input).numpy()
                         class_gradients.append(class_gradient)
+                        # Break after 1 iteration for binary classification case
+                        if len(predictions.shape) == 1 or predictions.shape[1] == 1:
+                            break
 
                     gradients = np.swapaxes(np.array(class_gradients), 0, 1)
 
diff --git a/art/utils.py b/art/utils.py
@@ -528,13 +528,18 @@ def check_and_transform_label_format(
         if len(labels.shape) == 2 and labels.shape[1] > 1:
             if not return_one_hot:
                 labels = np.argmax(labels, axis=1)
-        elif len(labels.shape) == 2 and labels.shape[1] == 1:
+        elif len(labels.shape) == 2 and labels.shape[1] == 1 and nb_classes is not None and nb_classes > 2:
             labels = np.squeeze(labels)
             if return_one_hot:
                 labels = to_categorical(labels, nb_classes)
+        elif len(labels.shape) == 2 and labels.shape[1] == 1 and nb_classes is not None and nb_classes == 2:
+            pass
         elif len(labels.shape) == 1:
             if return_one_hot:
-                labels = to_categorical(labels, nb_classes)
+                if nb_classes == 2:
+                    labels = np.expand_dims(labels, axis=1)
+                else:
+                    labels = to_categorical(labels, nb_classes)
         else:
             raise ValueError(
                 "Shape of labels not recognised."
@@ -616,7 +621,10 @@ def get_labels_np_array(preds: np.ndarray) -> np.ndarray:
     :param preds: Array of class confidences, nb of instances as first dimension.
     :return: Labels.
     """
-    preds_max = np.amax(preds, axis=1, keepdims=True)
+    if len(preds.shape) >= 2:
+        preds_max = np.amax(preds, axis=1, keepdims=True)
+    else:
+        preds_max = np.round(preds)
     y = preds == preds_max
     y = y.astype(np.uint8)
     return y
@@ -642,11 +650,19 @@ def compute_success_array(
     :param batch_size: Batch size.
     :return: Percentage of successful adversarial samples.
     """
-    adv_preds = np.argmax(classifier.predict(x_adv, batch_size=batch_size), axis=1)
+    adv_preds = classifier.predict(x_adv, batch_size=batch_size)
+    if len(adv_preds.shape) >= 2:
+        adv_preds = np.argmax(adv_preds, axis=1)
+    else:
+        adv_preds = np.round(adv_preds)
     if targeted:
         attack_success = adv_preds == np.argmax(labels, axis=1)
     else:
-        preds = np.argmax(classifier.predict(x_clean, batch_size=batch_size), axis=1)
+        preds = classifier.predict(x_clean, batch_size=batch_size)
+        if len(preds.shape) >= 2:
+            preds = np.argmax(preds, axis=1)
+        else:
+            preds = np.round(preds)
         attack_success = adv_preds != preds
 
     return attack_success
diff --git a/tests/attacks/inference/attribute_inference/test_meminf_based.py b/tests/attacks/inference/attribute_inference/test_meminf_based.py
@@ -187,8 +187,8 @@ def transform_feature(x):
         inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values)
         inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values)
         # check accuracy
-        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
-        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
+        train_acc = np.sum(inferred_train == x_train_feature) / len(inferred_train)
+        test_acc = np.sum(inferred_test == x_test_feature) / len(inferred_test)
         assert 0.1 <= train_acc
         assert 0.1 <= test_acc
 
@@ -325,18 +325,18 @@ def transform_feature(x):
         attack_train_ratio = 0.5
         attack_train_size = int(len(x_train) * attack_train_ratio)
         attack_test_size = int(len(x_test) * attack_train_ratio)
-        # attack without callibration
+        # attack without calibration
         attack = AttributeInferenceMembership(classifier, meminf_attack, attack_feature=attack_feature)
         # infer attacked feature
         inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values)
         inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values)
         # check accuracy
-        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
-        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
+        train_acc = np.sum(inferred_train == x_train_feature) / len(inferred_train)
+        test_acc = np.sum(inferred_test == x_test_feature) / len(inferred_test)
         assert 0.5 <= train_acc
         assert 0.5 <= test_acc
 
-        # attack with callibration
+        # attack with calibration
         meminf_attack.calibrate_distance_threshold(
             x_train[:attack_train_size],
             y_train_iris[:attack_train_size],
@@ -349,8 +349,8 @@ def transform_feature(x):
         inferred_train = attack.infer(x_train_for_attack, y_train_iris, values=values)
         inferred_test = attack.infer(x_test_for_attack, y_test_iris, values=values)
         # check accuracy
-        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
-        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
+        train_acc = np.sum(inferred_train == x_train_feature) / len(inferred_train)
+        test_acc = np.sum(inferred_test == x_test_feature) / len(inferred_test)
         assert 0.1 <= train_acc
         assert 0.1 <= test_acc
 
diff --git a/tests/attacks/inference/membership_inference/test_black_box.py b/tests/attacks/inference/membership_inference/test_black_box.py
@@ -122,7 +122,6 @@ def test_black_box_with_model(art_warning, tabular_dl_estimator_for_attack, esti
     try:
         classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox)
         attack_model = estimator_for_attack(num_features=2 * num_classes_iris)
-        print(type(attack_model).__name__)
         attack = MembershipInferenceBlackBox(classifier, attack_model=attack_model)
         backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25)
     except ARTTestException as e:
@@ -153,7 +152,6 @@ def test_black_box_with_model_prob(
     try:
         classifier = tabular_dl_estimator_for_attack(MembershipInferenceBlackBox)
         attack_model = estimator_for_attack(num_features=2 * num_classes_iris)
-        print(type(attack_model).__name__)
         attack = MembershipInferenceBlackBox(classifier, attack_model=attack_model)
         backend_check_membership_probabilities(attack, get_iris_dataset, attack_train_ratio)
     except ARTTestException as e:
@@ -230,6 +228,5 @@ def backend_check_membership_probabilities(attack, dataset, attack_train_ratio):
 
 
 def backend_check_probabilities(pred, prob):
-    assert prob.shape[1] == 2
-    assert np.all(np.around(np.sum(prob, axis=1), decimals=5) == 1)
-    assert np.all(np.argmax(prob, axis=1) == pred.astype(int))
+    assert prob.shape[1] == 1
+    assert np.all(np.round(prob) == pred.astype(int))
diff --git a/tests/classifiersFrameworks/test_pytorch.py b/tests/classifiersFrameworks/test_pytorch.py
@@ -17,9 +17,14 @@
 # SOFTWARE.
 import numpy as np
 import pytest
+
 import torch
+import torch.nn.functional as F
 import torch.nn as nn
+import torch.optim as optim
+import sklearn.datasets
 
+import art.estimators.classification
 from art.estimators.classification.pytorch import PyTorchClassifier
 from art.defences.preprocessor.spatial_smoothing import SpatialSmoothing
 from art.defences.preprocessor.spatial_smoothing_pytorch import SpatialSmoothingPyTorch
@@ -221,3 +226,53 @@ def test_fgsm_defences(art_warning, fix_get_mnist_subset, image_dl_estimator, de
         backend_test_defended_images(attack, fix_get_mnist_subset)
     except ARTTestException as e:
         art_warning(e)
+
+
+@pytest.mark.only_with_platform("pytorch")
+def test_pytorch_binary_PGD(art_warning, get_mnist_dataset):
+    """
+    This test instantiates a binary classification Pytorch model, then attacks it using PGD
+
+    """
+
+    class BasicModel(nn.Module):
+        def __init__(self):
+            super(BasicModel, self).__init__()
+            self.layer_1 = nn.Linear(20, 32)
+            self.layer_2 = nn.Linear(32, 1)
+
+        def forward(self, x):
+            x = F.relu(self.layer_1(x))
+            x = torch.sigmoid(self.layer_2(x))
+
+            return x
+
+    try:
+        device = "cpu"
+        x, y = sklearn.datasets.make_classification(
+            n_samples=10000, n_features=20, n_informative=5, n_redundant=2, n_repeated=0, n_classes=2
+        )
+        train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split(x, y, test_size=0.2)
+        train_x = test_x.astype(np.float32)
+        train_y = train_y.astype(np.float32)
+        test_x = test_x.astype(np.float32)
+        model = BasicModel()
+        loss_func = nn.BCELoss()
+        model.to(device)
+        opt = optim.Adam(model.parameters(), lr=0.001)
+        classifier = art.estimators.classification.PyTorchClassifier(
+            model=model,
+            loss=loss_func,
+            optimizer=opt,
+            input_shape=(1, 28, 28),
+            nb_classes=2,
+        )
+        classifier.fit(train_x, train_y, batch_size=64, nb_epochs=3)
+        test_x_batch = test_x[0:16]
+        preds = classifier.predict(test_x_batch)
+        attacker = art.attacks.evasion.ProjectedGradientDescent(classifier, eps=0.5)
+        generated = attacker.generate(test_x_batch)
+        adv_predicted = classifier.predict(generated)
+        assert (adv_predicted != preds).all()
+    except ARTTestException as e:
+        art_warning(e)
diff --git a/tests/classifiersFrameworks/test_tensorflow.py b/tests/classifiersFrameworks/test_tensorflow.py
diff --git a/tests/estimators/classification/test_detector_classifier.py b/tests/estimators/classification/test_detector_classifier.py
diff --git a/tests/test_utils.py b/tests/test_utils.py
diff --git a/tests/utils.py b/tests/utils.py