Merge pull request #2384 from abigailgold/dev_1.18.0_scaling

beat-buesser · web-flow · commit 31fafff43bc5 · 2024-04-04T09:10:42.000+02:00
Support scaling input features to attacks
diff --git a/art/attacks/inference/attribute_inference/black_box.py b/art/attacks/inference/attribute_inference/black_box.py
@@ -32,6 +32,7 @@
 from sklearn.svm import SVC, SVR
 from sklearn.preprocessing import minmax_scale, OneHotEncoder, OrdinalEncoder
 from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
 
 from art.estimators.estimator import BaseEstimator
 from art.estimators.classification.classifier import ClassifierMixin
@@ -80,6 +81,7 @@ def __init__(
         is_continuous: Optional[bool] = False,
         scale_range: Optional[Tuple[float, float]] = None,
         prediction_normal_factor: Optional[float] = 1,
+        scaler_type: Optional[str] = "standard",
         non_numerical_features: Optional[List[int]] = None,
         encoder: Optional[Union[OrdinalEncoder, OneHotEncoder, ColumnTransformer]] = None,
         nn_model_epochs: int = 100,
@@ -109,7 +111,11 @@ def __init__(
                             Only applicable when `estimator` is a regressor.
         :param prediction_normal_factor: If supplied, the class labels (both true and predicted) are multiplied by the
                                          factor when used as inputs to the attack-model. Only applicable when
-                                         `estimator` is a regressor and if `scale_range` is not supplied
+                                         `estimator` is a regressor and if `scale_range` is not supplied.
+        :param scaler_type: The type of scaling to apply to all input features to the attack. Can be one of: "standard",
+                            "minmax", "robust" or None. If not None, the appropriate scaler from scikit-learn will be
+                            applied. If None, no scaling will be applied. This is in addition to any specific scaling
+                            performed on the class labels based on the params scale_range or prediction_normal_factor.
         :param non_numerical_features: a list of feature indexes that require encoding in order to feed into an ML model
                                        (i.e., strings), not including the attacked feature. Should only be supplied if
                                        non-numeric features exist in the input data not including the attacked feature,
@@ -130,6 +136,8 @@ def __init__(
         self.attack_model: Optional[Any] = None
         self.prediction_normal_factor = prediction_normal_factor
         self.scale_range = scale_range
+        self.scaler_type = scaler_type
+        self.scaler: Optional[Any] = None
         self.epochs = nn_model_epochs
         self.batch_size = nn_model_batch_size
         self.learning_rate = nn_model_learning_rate
@@ -252,6 +260,19 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
         if y is not None:
             x_train = np.concatenate((x_train, y), axis=1)
 
+        if self.scaler_type:
+            if self.scaler_type == "standard":
+                self.scaler = StandardScaler()
+            elif self.scaler_type == "minmax":
+                self.scaler = MinMaxScaler()
+            elif self.scaler_type == "robust":
+                self.scaler = RobustScaler()
+            else:
+                raise ValueError("Illegal scaler_type: ", self.scaler_type)
+        if self.scaler:
+            self.scaler.fit(x_train)
+            x_train = self.scaler.transform(x_train)
+
         # train attack model
         if self._attack_model_type == "nn":
             import torch
@@ -407,6 +428,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
         if y is not None:
             x_test = np.concatenate((x_test, y), axis=1)
 
+        if self.scaler:
+            x_test = self.scaler.transform(x_test)
+
         if self._attack_model_type == "nn":
             from torch.utils.data import DataLoader
             from art.utils import to_cuda, from_cuda
diff --git a/art/attacks/inference/membership_inference/black_box.py b/art/attacks/inference/membership_inference/black_box.py
@@ -31,6 +31,7 @@
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.svm import SVC
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
 
 from art.attacks.attack import MembershipInferenceAttack
 from art.estimators.estimator import BaseEstimator
@@ -56,6 +57,10 @@ class MembershipInferenceBlackBox(MembershipInferenceAttack):
         "input_type",
         "attack_model_type",
         "attack_model",
+        "scaler_type",
+        "nn_model_epochs",
+        "nn_model_batch_size",
+        "nn_model_learning_rate",
     ]
     _estimator_requirements = (BaseEstimator, (ClassifierMixin, RegressorMixin))
 
@@ -65,6 +70,7 @@ def __init__(
         input_type: str = "prediction",
         attack_model_type: str = "nn",
         attack_model: Optional[Any] = None,
+        scaler_type: Optional[str] = "standard",
         nn_model_epochs: int = 100,
         nn_model_batch_size: int = 100,
         nn_model_learning_rate: float = 0.0001,
@@ -73,6 +79,9 @@ def __init__(
         Create a MembershipInferenceBlackBox attack instance.
 
         :param estimator: Target estimator.
+        :param input_type: the type of input to train the attack on. Can be one of: 'prediction' or 'loss'. Default is
+                           `prediction`. Predictions can be either probabilities or logits, depending on the return type
+                           of the model. If the model is a regressor, only `loss` can be used.
         :param attack_model_type: the type of default attack model to train, optional. Should be one of:
                                  `nn` (neural network, default),
                                  `rf` (random forest),
@@ -82,10 +91,10 @@ def __init__(
                                  `knn` (k nearest neighbors),
                                  `svm` (support vector machine).
                                  If `attack_model` is supplied, this option will be ignored.
-        :param input_type: the type of input to train the attack on. Can be one of: 'prediction' or 'loss'. Default is
-                           `prediction`. Predictions can be either probabilities or logits, depending on the return type
-                           of the model. If the model is a regressor, only `loss` can be used.
         :param attack_model: The attack model to train, optional. If none is provided, a default model will be created.
+        :param scaler_type: The type of scaling to apply to the input features to the attack. Can be one of: "standard",
+                            "minmax", "robust" or None. If not None, the appropriate scaler from scikit-learn will be
+                            applied. If None, no scaling will be applied.
         :param nn_model_epochs: the number of epochs to use when training a nn attack model
         :param nn_model_batch_size: the batch size to use when training a nn attack model
         :param nn_model_learning_rate: the learning rate to use when training a nn attack model
@@ -95,6 +104,8 @@ def __init__(
         self.input_type = input_type
         self.attack_model_type = attack_model_type
         self.attack_model = attack_model
+        self.scaler_type = scaler_type
+        self.scaler: Optional[Any] = None
         self.epochs = nn_model_epochs
         self.batch_size = nn_model_batch_size
         self.learning_rate = nn_model_learning_rate
@@ -245,13 +256,27 @@ def fit(  # pylint: disable=W0613
         if x_2 is None:
             self.use_label = False
 
+        if self.scaler_type:
+            if self.scaler_type == "standard":
+                self.scaler = StandardScaler()
+            elif self.scaler_type == "minmax":
+                self.scaler = MinMaxScaler()
+            elif self.scaler_type == "robust":
+                self.scaler = RobustScaler()
+            else:
+                raise ValueError("Illegal scaler_type: ", self.scaler_type)
+
         if self.default_model and self.attack_model_type == "nn":
             import torch
             from torch import nn
             from torch import optim
             from torch.utils.data import DataLoader
             from art.utils import to_cuda
 
+            if self.scaler:
+                self.scaler.fit(x_1)
+                x_1 = self.scaler.transform(x_1)
+
             if x_2 is not None:
 
                 class MembershipInferenceAttackModel(nn.Module):
@@ -393,8 +418,15 @@ def forward(self, x_1):
         else:  # not nn
             y_ready = check_and_transform_label_format(y_new, nb_classes=2, return_one_hot=False)
             if x_2 is not None:
-                self.attack_model.fit(np.c_[x_1, x_2], y_ready.ravel())  # type: ignore
+                x = np.c_[x_1, x_2]
+                if self.scaler:
+                    self.scaler.fit(x)
+                    x = self.scaler.transform(x)
+                self.attack_model.fit(x, y_ready.ravel())  # type: ignore
             else:
+                if self.scaler:
+                    self.scaler.fit(x_1)
+                    x_1 = self.scaler.transform(x_1)
                 self.attack_model.fit(x_1, y_ready.ravel())  # type: ignore
 
     def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
@@ -467,6 +499,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
             from torch.utils.data import DataLoader
             from art.utils import to_cuda, from_cuda
 
+            if self.scaler:
+                features = self.scaler.transform(features)
+
             self.attack_model.eval()  # type: ignore
             predictions: Optional[np.ndarray] = None
 
@@ -512,17 +547,27 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
         elif not self.default_model:
             # assumes the predict method of the supplied model returns probabilities
             if y is not None and self.use_label:
-                inferred = self.attack_model.predict(np.c_[features, y])  # type: ignore
+                features = np.c_[features, y]
+                if self.scaler:
+                    features = self.scaler.transform(features)
+                inferred = self.attack_model.predict(features)  # type: ignore
             else:
+                if self.scaler:
+                    features = self.scaler.transform(features)
                 inferred = self.attack_model.predict(features)  # type: ignore
             if probabilities:
                 inferred_return = inferred
             else:
                 inferred_return = np.round(inferred)
         else:
             if y is not None and self.use_label:
-                inferred = self.attack_model.predict_proba(np.c_[features, y])  # type: ignore
+                features = np.c_[features, y]
+                if self.scaler:
+                    features = self.scaler.transform(features)
+                inferred = self.attack_model.predict_proba(features)  # type: ignore
             else:
+                if self.scaler:
+                    features = self.scaler.transform(features)
                 inferred = self.attack_model.predict_proba(features)  # type: ignore
             if probabilities:
                 inferred_return = inferred[:, [1]]
diff --git a/tests/attacks/inference/attribute_inference/test_black_box.py b/tests/attacks/inference/attribute_inference/test_black_box.py
@@ -94,6 +94,107 @@ def transform_feature(x):
         art_warning(e)
 
 
+@pytest.mark.skip_framework("dl_frameworks")
+@pytest.mark.parametrize("scaler_type", ["standard", "robust", "minmax"])
+def test_black_box_scalers(art_warning, scaler_type, decision_tree_estimator, get_iris_dataset):
+    try:
+        attack_feature = 2  # petal length
+
+        # need to transform attacked feature into categorical
+        def transform_feature(x):
+            x[x > 0.5] = 2.0
+            x[(x > 0.2) & (x <= 0.5)] = 1.0
+            x[x <= 0.2] = 0.0
+
+        values = [0.0, 1.0, 2.0]
+
+        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
+        # training data without attacked feature
+        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
+        # only attacked feature
+        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
+        transform_feature(x_train_feature)
+        # training data with attacked feature (after transformation)
+        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
+        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
+
+        # test data without attacked feature
+        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
+        # only attacked feature
+        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
+        transform_feature(x_test_feature)
+
+        classifier = decision_tree_estimator()
+
+        attack = AttributeInferenceBlackBox(classifier, attack_feature=attack_feature, scaler_type=scaler_type)
+        # get original model's predictions
+        x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
+        x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
+        # train attack model
+        attack.fit(x_train)
+        # infer attacked feature
+        inferred_train = attack.infer(x_train_for_attack, pred=x_train_predictions, values=values)
+        inferred_test = attack.infer(x_test_for_attack, pred=x_test_predictions, values=values)
+        # check accuracy
+        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
+        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
+        assert pytest.approx(0.8285, abs=0.3) == train_acc
+        assert pytest.approx(0.8888, abs=0.3) == test_acc
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
+@pytest.mark.skip_framework("dl_frameworks")
+def test_black_box_tabular_no_scaler(art_warning, decision_tree_estimator, get_iris_dataset):
+    try:
+        attack_feature = 2  # petal length
+
+        # need to transform attacked feature into categorical
+        def transform_feature(x):
+            x[x > 0.5] = 2.0
+            x[(x > 0.2) & (x <= 0.5)] = 1.0
+            x[x <= 0.2] = 0.0
+
+        values = [0.0, 1.0, 2.0]
+
+        (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
+        # training data without attacked feature
+        x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
+        # only attacked feature
+        x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
+        transform_feature(x_train_feature)
+        # training data with attacked feature (after transformation)
+        x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
+        x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
+
+        # test data without attacked feature
+        x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
+        # only attacked feature
+        x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
+        transform_feature(x_test_feature)
+
+        classifier = decision_tree_estimator()
+
+        attack = AttributeInferenceBlackBox(classifier, attack_feature=attack_feature, scaler_type=None)
+        # get original model's predictions
+        x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
+        x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
+        # train attack model
+        attack.fit(x_train)
+        # infer attacked feature
+        inferred_train = attack.infer(x_train_for_attack, pred=x_train_predictions, values=values)
+        inferred_test = attack.infer(x_test_for_attack, pred=x_test_predictions, values=values)
+        # check accuracy
+        train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
+        test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
+        assert pytest.approx(0.8285, abs=0.3) == train_acc
+        assert pytest.approx(0.8888, abs=0.3) == test_acc
+
+    except ARTTestException as e:
+        art_warning(e)
+
+
 @pytest.mark.skip_framework("dl_frameworks")
 @pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"])
 def test_black_box_continuous(art_warning, decision_tree_estimator, get_iris_dataset, model_type):
diff --git a/tests/attacks/inference/membership_inference/test_black_box.py b/tests/attacks/inference/membership_inference/test_black_box.py
@@ -57,6 +57,25 @@ def test_black_box_tabular(art_warning, model_type, decision_tree_estimator, get
         art_warning(e)
 
 
+@pytest.mark.parametrize("scaler_type", ["standard", "robust", "minmax"])
+def test_black_box_tabular_scalers(art_warning, scaler_type, decision_tree_estimator, get_iris_dataset):
+    try:
+        classifier = decision_tree_estimator()
+        attack = MembershipInferenceBlackBox(classifier, scaler_type=scaler_type)
+        backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25)
+    except ARTTestException as e:
+        art_warning(e)
+
+
+def test_black_box_tabular_no_scaler(art_warning, decision_tree_estimator, get_iris_dataset):
+    try:
+        classifier = decision_tree_estimator()
+        attack = MembershipInferenceBlackBox(classifier, scaler_type=None)
+        backend_check_membership_accuracy(attack, get_iris_dataset, attack_train_ratio, 0.25)
+    except ARTTestException as e:
+        art_warning(e)
+
+
 @pytest.mark.parametrize("model_type", ["nn", "rf", "gb", "lr", "dt", "knn", "svm"])
 def test_black_box_tabular_no_label(art_warning, model_type, decision_tree_estimator, get_iris_dataset):
     try: