Merge pull request #1645 from Trusted-AI/dev_1.10.1

beat-buesser · web-flow · commit 030dc49ee439 · 2022-04-22T11:48:25.000+01:00
Update to ART 1.10.1
diff --git a/.github/workflows/ci-style-checks.yml b/.github/workflows/ci-style-checks.yml
@@ -41,9 +41,9 @@ jobs:
           pip install pluggy==0.13.1
           pip install tensorflow==2.7.0
           pip install keras==2.7.0
-          python -m pip install types-six
-          python -m pip install types-PyYAML
-          python3 -m pip install types-setuptools
+          pip install types-six
+          pip install types-PyYAML
+          pip install types-setuptools
           pip install click==8.0.2
           pip list
       - name: pycodestyle
diff --git a/art/attacks/evasion/adversarial_patch/adversarial_patch_pytorch.py b/art/attacks/evasion/adversarial_patch/adversarial_patch_pytorch.py
@@ -683,7 +683,10 @@ def apply_patch(
             mask = mask.copy()
         mask = self._check_mask(mask=mask, x=x)
         x_tensor = torch.Tensor(x)
-        mask_tensor = torch.Tensor(mask)
+        if mask is not None:
+            mask_tensor = torch.Tensor(mask)
+        else:
+            mask_tensor = None
         if isinstance(patch_external, np.ndarray):
             patch_tensor = torch.Tensor(patch_external)
         else:
diff --git a/art/attacks/inference/attribute_inference/black_box.py b/art/attacks/inference/attribute_inference/black_box.py
@@ -153,6 +153,8 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
         # get model's predictions for x
         if ClassifierMixin in type(self.estimator).__mro__:
             predictions = np.array([np.argmax(arr) for arr in self.estimator.predict(x)]).reshape(-1, 1)
+            if y is not None:
+                y = check_and_transform_label_format(y, return_one_hot=True)
         else:  # Regression model
             if self.scale_range is not None:
                 predictions = minmax_scale(self.estimator.predict(x).reshape(-1, 1), feature_range=self.scale_range)
@@ -162,6 +164,8 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
                 predictions = self.estimator.predict(x).reshape(-1, 1) * self.prediction_normal_factor
                 if y is not None:
                     y = y * self.prediction_normal_factor
+            if y is not None:
+                y = y.reshape(-1, 1)
 
         # get vector of attacked feature
         y_attack = x[:, self.attack_feature]
@@ -176,7 +180,6 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
         x_train = np.concatenate((np.delete(x, self.attack_feature, 1), predictions), axis=1).astype(np.float32)
 
         if y is not None:
-            y = check_and_transform_label_format(y, return_one_hot=True)
             x_train = np.concatenate((x_train, y), axis=1)
 
         # train attack model
@@ -227,11 +230,14 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
                 x_test = np.concatenate((x, pred * self.prediction_normal_factor), axis=1).astype(np.float32)
                 if y is not None:
                     y = y * self.prediction_normal_factor
+            if y is not None:
+                y = y.reshape(-1, 1)
         else:
             x_test = np.concatenate((x, pred), axis=1).astype(np.float32)
+            if y is not None:
+                y = check_and_transform_label_format(y, return_one_hot=True)
 
         if y is not None:
-            y = check_and_transform_label_format(y, return_one_hot=True)
             x_test = np.concatenate((x_test, y), axis=1)
 
         predictions = self.attack_model.predict(x_test).astype(np.float32)
diff --git a/art/attacks/inference/attribute_inference/true_label_baseline.py b/art/attacks/inference/attribute_inference/true_label_baseline.py
@@ -59,6 +59,7 @@ def __init__(
         attack_model_type: str = "nn",
         attack_model: Optional["CLASSIFIER_TYPE"] = None,
         attack_feature: Union[int, slice] = 0,
+        is_regression: Optional[bool] = False,
         scale_range: Optional[slice] = None,
         prediction_normal_factor: float = 1,
     ):
@@ -72,11 +73,12 @@ def __init__(
         :param attack_feature: The index of the feature to be attacked or a slice representing multiple indexes in
                                case of a one-hot encoded feature.
                                case of a one-hot encoded feature.
+        :param is_regression: Whether the model is a regression model. Default is False (classification).
         :param scale_range: If supplied, the class labels (both true and predicted) will be scaled to the given range.
-                            Only applicable when `estimator` is a regressor.
+                            Only applicable when `is_regression` is True.
         :param prediction_normal_factor: If supplied, the class labels (both true and predicted) are multiplied by the
                                          factor when used as inputs to the attack-model. Only applicable when
-                                         `estimator` is a regressor and if `scale_range` is not supplied.
+                                         `is_regression` is True and if `scale_range` is not supplied.
         """
         super().__init__(estimator=None, attack_feature=attack_feature)
 
@@ -119,6 +121,7 @@ def __init__(
 
         self.prediction_normal_factor = prediction_normal_factor
         self.scale_range = scale_range
+        self.is_regression = is_regression
         self._check_params()
         self.attack_feature = get_feature_index(self.attack_feature)
 
@@ -146,11 +149,14 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
             raise ValueError("None value detected.")
 
         # create training set for attack model
-        if self.scale_range is not None:
-            normalized_labels = minmax_scale(y, feature_range=self.scale_range)
+        if self.is_regression:
+            if self.scale_range is not None:
+                normalized_labels = minmax_scale(y, feature_range=self.scale_range)
+            else:
+                normalized_labels = y * self.prediction_normal_factor
+            normalized_labels = normalized_labels.reshape(-1, 1)
         else:
-            normalized_labels = y * self.prediction_normal_factor
-        normalized_labels = check_and_transform_label_format(normalized_labels, return_one_hot=True)
+            normalized_labels = check_and_transform_label_format(y, return_one_hot=True)
         x_train = np.concatenate((np.delete(x, self.attack_feature, 1), normalized_labels), axis=1).astype(np.float32)
 
         # train attack model
@@ -179,11 +185,14 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
         if values is not None:
             self._values = values
 
-        if self.scale_range is not None:
-            normalized_labels = minmax_scale(y, feature_range=self.scale_range)
+        if self.is_regression:
+            if self.scale_range is not None:
+                normalized_labels = minmax_scale(y, feature_range=self.scale_range)
+            else:
+                normalized_labels = y * self.prediction_normal_factor
+            normalized_labels = normalized_labels.reshape(-1, 1)
         else:
-            normalized_labels = y * self.prediction_normal_factor
-        normalized_labels = check_and_transform_label_format(normalized_labels, return_one_hot=True)
+            normalized_labels = check_and_transform_label_format(y, return_one_hot=True)
         x_test = np.concatenate((x, normalized_labels), axis=1).astype(np.float32)
 
         predictions = self.attack_model.predict(x_test).astype(np.float32)
diff --git a/art/attacks/poisoning/gradient_matching_attack.py b/art/attacks/poisoning/gradient_matching_attack.py
@@ -151,8 +151,8 @@ def __initialize_poison_tensorflow(
 
         :param x_trigger: A list of samples to use as triggers.
         :param y_trigger: A list of target classes to classify the triggers into.
-        :param x_train: A list of training data to poison a portion of.
-        :param y_train: A list of labels for x_train.
+        :param x_poison: A list of training data to poison a portion of.
+        :param y_poison: A list of true labels for x_poison.
         """
         # pylint: disable=no-name-in-module
         from tensorflow.keras import backend as K
@@ -190,7 +190,7 @@ def _weight_grad(classifier: TensorFlowV2Classifier, x: tf.Tensor, target: tf.Te
         y_true_poison = Input(shape=np.shape(y_poison)[1:])
         embedding_layer = Embedding(
             len(x_poison),
-            np.prod(input_poison.shape[1:]),
+            np.prod(x_poison.shape[1:]),
             embeddings_initializer=tf.keras.initializers.RandomNormal(stddev=self.epsilon * 0.01),
         )
         embeddings = embedding_layer(input_indices)
diff --git a/art/defences/trainer/adversarial_trainer_madry_pgd.py b/art/defences/trainer/adversarial_trainer_madry_pgd.py
@@ -54,8 +54,8 @@ class AdversarialTrainerMadryPGD(Trainer):
     def __init__(
         self,
         classifier: "CLASSIFIER_LOSS_GRADIENTS_TYPE",
-        nb_epochs: int = 391,
-        batch_size: int = 128,
+        nb_epochs: Optional[int] = 391,
+        batch_size: Optional[int] = 128,
         eps: Union[int, float] = 8,
         eps_step: Union[int, float] = 2,
         max_iter: int = 7,
@@ -91,18 +91,41 @@ def __init__(
         self.trainer = AdversarialTrainer(classifier, self.attack, ratio=1.0)  # type: ignore
 
     def fit(  # pylint: disable=W0221
-        self, x: np.ndarray, y: np.ndarray, validation_data: Optional[np.ndarray] = None, **kwargs
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        validation_data: Optional[np.ndarray] = None,
+        batch_size: Optional[int] = None,
+        nb_epochs: Optional[int] = None,
+        **kwargs
     ) -> None:
         """
         Train a model adversarially. See class documentation for more information on the exact procedure.
 
         :param x: Training data.
         :param y: Labels for the training data.
         :param validation_data: Validation data.
+        :param batch_size: Size of batches. Overwrites batch_size defined in __init__ if not None.
+        :param nb_epochs: Number of epochs to use for trainings. Overwrites nb_epochs defined in __init__ if not None.
         :param kwargs: Dictionary of framework-specific arguments.
         """
+        batch_size_fit: int
+        if batch_size is not None:
+            batch_size_fit = batch_size
+        elif self.batch_size is not None:
+            batch_size_fit = self.batch_size
+        else:
+            raise ValueError("Please provide value for `batch_size`.")
+
+        if nb_epochs is not None:
+            nb_epochs_fit: int = nb_epochs
+        elif self.nb_epochs is not None:
+            nb_epochs_fit = self.nb_epochs
+        else:
+            raise ValueError("Please provide value for `nb_epochs`.")
+
         self.trainer.fit(
-            x, y, validation_data=validation_data, nb_epochs=self.nb_epochs, batch_size=self.batch_size, **kwargs
+            x, y, validation_data=validation_data, nb_epochs=nb_epochs_fit, batch_size=batch_size_fit, **kwargs
         )
 
     def get_classifier(self) -> "CLASSIFIER_LOSS_GRADIENTS_TYPE":
diff --git a/art/estimators/object_detection/python_object_detector.py b/art/estimators/object_detection/python_object_detector.py
@@ -74,7 +74,7 @@ def __init__(
                maximum values allowed for features. If floats are provided, these will be used as the range of all
                features. If arrays are provided, each value will be considered the bound for a feature, thus
                the shape of clip values needs to match the total number of features.
-        :param channels_first: Set channels first or last.
+        :param channels_first: [Currently unused] Set channels first or last.
         :param preprocessing_defences: Preprocessing defence(s) to be applied by the classifier.
         :param postprocessing_defences: Postprocessing defence(s) to be applied by the classifier.
         :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be
@@ -214,7 +214,7 @@ def _get_losses(
                     x_grad.requires_grad = True
                 else:
                     x_grad = x[i].to(self.device)
-                    if x_grad.shape[-1] in [1, 3]:
+                    if x_grad.shape[2] < x_grad.shape[0] and x_grad.shape[2] < x_grad.shape[1]:
                         x_grad = torch.permute(x_grad, (2, 0, 1))
 
                 image_tensor_list_grad.append(x_grad)
diff --git a/art/estimators/object_detection/pytorch_faster_rcnn.py b/art/estimators/object_detection/pytorch_faster_rcnn.py
@@ -70,7 +70,7 @@ def __init__(
                maximum values allowed for features. If floats are provided, these will be used as the range of all
                features. If arrays are provided, each value will be considered the bound for a feature, thus
                the shape of clip values needs to match the total number of features.
-        :param channels_first: Set channels first or last.
+        :param channels_first: [Currently unused] Set channels first or last.
         :param preprocessing_defences: Preprocessing defence(s) to be applied by the classifier.
         :param postprocessing_defences: Postprocessing defence(s) to be applied by the classifier.
         :param preprocessing: Tuple of the form `(subtrahend, divisor)` of floats or `np.ndarray` of values to be
diff --git a/tests/attacks/inference/attribute_inference/test_black_box.py b/tests/attacks/inference/attribute_inference/test_black_box.py
@@ -314,6 +314,77 @@ def transform_feature(x):
         art_warning(e)
 
 
+@pytest.mark.skip_framework("dl_frameworks")
+@pytest.mark.parametrize("model_type", ["nn", "rf"])
+def test_black_box_regressor_label(art_warning, get_diabetes_dataset, model_type):
+    try:
+        attack_feature = 0  # age
+
+        bins = [
+            -0.96838121,
+            -0.77154309,
+            -0.57470497,
+            -0.37786684,
+            -0.18102872,
+            0.0158094,
+            0.21264752,
+            0.40948564,
+            0.60632376,
+            0.80316188,
+            1.0,
+        ]
+
+        # need to transform attacked feature into categorical
+        def transform_feature(x):
+            for i in range(len(bins) - 1):
+                x[(x >= bins[i]) & (x <= bins[i + 1])] = i
+
+        values = list(range(len(bins) - 1))
+
+        (x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes) = get_diabetes_dataset
+        # training data without attacked feature
+        x_train_for_attack = np.delete(x_train_diabetes, attack_feature, 1)
+        # only attacked feature
+        x_train_feature = x_train_diabetes[:, attack_feature].copy().reshape(-1, 1)
+        transform_feature(x_train_feature)
+        # training data with attacked feature (after transformation)
+        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
+        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
+
+        # test data without attacked feature
+        x_test_for_attack = np.delete(x_test_diabetes, attack_feature, 1)
+        # only attacked feature
+        x_test_feature = x_test_diabetes[:, attack_feature].copy().reshape(-1, 1)
+        transform_feature(x_test_feature)
+
+        from sklearn import linear_model
+
+        regr_model = linear_model.LinearRegression()
+        regr_model.fit(x_train_diabetes, y_train_diabetes)
+        regressor = ScikitlearnRegressor(regr_model)
+
+        attack = AttributeInferenceBlackBox(
+            regressor, attack_feature=attack_feature, prediction_normal_factor=1 / 250, attack_model_type=model_type
+        )
+        # get original model's predictions
+        x_train_predictions = regressor.predict(x_train_diabetes).reshape(-1, 1)
+        x_test_predictions = regressor.predict(x_test_diabetes).reshape(-1, 1)
+        # train attack model
+        attack.fit(x_train, y=y_train_diabetes)
+        # infer attacked feature
+        inferred_train = attack.infer(x_train_for_attack, pred=x_train_predictions, values=values, y=y_train_diabetes)
+        inferred_test = attack.infer(x_test_for_attack, pred=x_test_predictions, values=values, y=y_test_diabetes)
+        # check accuracy
+        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
+        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
+
+        assert pytest.approx(0.0258, abs=0.12) == train_acc
+        assert pytest.approx(0.0375, abs=0.12) == test_acc
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
 @pytest.mark.skip_framework("dl_frameworks")
 def test_black_box_with_model(art_warning, decision_tree_estimator, get_iris_dataset):
     try:
diff --git a/tests/attacks/inference/attribute_inference/test_true_label_baseline.py b/tests/attacks/inference/attribute_inference/test_true_label_baseline.py
@@ -183,6 +183,46 @@ def transform_feature(x):
         art_warning(e)
 
 
+@pytest.mark.skip_framework("dl_frameworks")
+@pytest.mark.parametrize("model_type", ["nn", "rf"])
+def test_true_label_baseline_regression(art_warning, get_diabetes_dataset, model_type):
+    try:
+        attack_feature = 1  # sex
+
+        (x_train, y_train), (x_test, y_test) = get_diabetes_dataset
+        # training data without attacked feature
+        x_train_for_attack = np.delete(x_train, attack_feature, 1)
+        # only attacked feature
+        x_train_feature = x_train[:, attack_feature].copy().reshape(-1, 1)
+
+        # test data without attacked feature
+        x_test_for_attack = np.delete(x_test, attack_feature, 1)
+        # only attacked feature
+        x_test_feature = x_test[:, attack_feature].copy().reshape(-1, 1)
+
+        baseline_attack = AttributeInferenceBaselineTrueLabel(
+            attack_feature=attack_feature, attack_model_type=model_type, is_regression=True
+        )
+        # train attack model
+        baseline_attack.fit(x_train, y_train)
+        # infer attacked feature
+        baseline_inferred_train = baseline_attack.infer(x_train_for_attack, y=y_train)
+        baseline_inferred_test = baseline_attack.infer(x_test_for_attack, y=y_test)
+        # check accuracy
+        baseline_train_acc = np.sum(baseline_inferred_train == x_train_feature.reshape(1, -1)) / len(
+            baseline_inferred_train
+        )
+        baseline_test_acc = np.sum(baseline_inferred_test == x_test_feature.reshape(1, -1)) / len(
+            baseline_inferred_test
+        )
+
+        assert 0.6 <= baseline_train_acc
+        assert 0.6 <= baseline_test_acc
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
 def test_check_params(art_warning):
     try:
         with pytest.raises(ValueError):