Trusted-AI
diff --git a/‎art/attacks/attack.py‎
Lines changed: 33 additions & 4 deletions b/‎art/attacks/attack.py‎
Lines changed: 33 additions & 4 deletions
diff --git a/‎art/attacks/inference/attribute_inference/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎art/attacks/inference/attribute_inference/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎art/attacks/inference/attribute_inference/baseline.py‎
Lines changed: 6 additions & 2 deletions b/‎art/attacks/inference/attribute_inference/baseline.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎art/attacks/inference/attribute_inference/black_box.py‎
Lines changed: 6 additions & 2 deletions b/‎art/attacks/inference/attribute_inference/black_box.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎art/attacks/inference/attribute_inference/meminf_based.py‎
Lines changed: 154 additions & 0 deletions b/‎art/attacks/inference/attribute_inference/meminf_based.py‎
Lines changed: 154 additions & 0 deletions
diff --git a/‎art/attacks/inference/attribute_inference/white_box_decision_tree.py‎
Lines changed: 2 additions & 2 deletions b/‎art/attacks/inference/attribute_inference/white_box_decision_tree.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎art/attacks/inference/attribute_inference/white_box_lifestyle_decision_tree.py‎
Lines changed: 2 additions & 2 deletions b/‎art/attacks/inference/attribute_inference/white_box_lifestyle_decision_tree.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎art/attacks/inference/membership_inference/black_box.py‎
Lines changed: 35 additions & 12 deletions b/‎art/attacks/inference/membership_inference/black_box.py‎
Lines changed: 35 additions & 12 deletions
@@ -329,12 +329,12 @@ def __init__(self, estimator):
     @abc.abstractmethod
     def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
         """
-        Infer sensitive properties (attributes, membership training records) from the targeted estimator. This method
+        Infer sensitive attributes from the targeted estimator. This method
         should be overridden by all concrete inference attack implementations.
 
         :param x: An array with reference inputs to be used in the attack.
         :param y: Labels for `x`. This parameter is only used by some of the attacks.
-        :return: An array holding the inferred properties.
+        :return: An array holding the inferred attribute values.
         """
         raise NotImplementedError
 
@@ -358,12 +358,41 @@ def __init__(self, estimator, attack_feature: Union[int, slice] = 0):
     @abc.abstractmethod
     def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
         """
-        Infer sensitive properties (attributes, membership training records) from the targeted estimator. This method
+        Infer sensitive attributes from the targeted estimator. This method
         should be overridden by all concrete inference attack implementations.
 
         :param x: An array with reference inputs to be used in the attack.
         :param y: Labels for `x`. This parameter is only used by some of the attacks.
-        :return: An array holding the inferred properties.
+        :return: An array holding the inferred attribute values.
+        """
+        raise NotImplementedError
+
+
+class MembershipInferenceAttack(InferenceAttack):
+    """
+    Abstract base class for membership inference attack classes.
+    """
+
+    def __init__(self, estimator: Union["CLASSIFIER_TYPE"]):
+        """
+        :param estimator: A trained estimator targeted for inference attack.
+        :type estimator: :class:`.art.estimators.estimator.BaseEstimator`
+        :param attack_feature: The index of the feature to be attacked.
+        """
+        super().__init__(estimator)
+
+    @abc.abstractmethod
+    def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
+        """
+        Infer membership status of samples from the target estimator. This method
+        should be overridden by all concrete inference attack implementations.
+
+        :param x: An array with reference inputs to be used in the attack.
+        :param y: Labels for `x`. This parameter is only used by some of the attacks.
+        :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just
+                              the predicted class.
+        :return: An array holding the inferred membership status (1 indicates member of training set,
+                 0 indicates non-member) or class probabilities.
         """
         raise NotImplementedError
 
 
@@ -7,3 +7,4 @@
 from art.attacks.inference.attribute_inference.white_box_lifestyle_decision_tree import (
     AttributeInferenceWhiteBoxLifestyleDecisionTree,
 )
+from art.attacks.inference.attribute_inference.meminf_based import AttributeInferenceMembership
@@ -128,8 +128,12 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
         :param x: Input to attack. Includes all features except the attacked feature.
         :param y: Not used in this attack.
-        :param values: Possible values for attacked feature. Only needed in case of categorical feature (not one-hot).
-        :type values: `np.ndarray`
+        :param values: Possible values for attacked feature. For a single column feature this should be a simple list
+                       containing all possible values, in increasing order (the smallest value in the 0 index and so
+                       on). For a multi-column feature (for example 1-hot encoded and then scaled), this should be a
+                       list of lists, where each internal list represents a column (in increasing order) and the values
+                       represent the possible values for that column (in increasing order).
+        :type values: list
         :return: The inferred feature values.
         """
         x_test = x.astype(np.float32)
 
@@ -138,8 +138,12 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
         :param x: Input to attack. Includes all features except the attacked feature.
         :param y: Original model's predictions for x.
-        :param values: Possible values for attacked feature. Only needed in case of categorical feature (not one-hot).
-        :type values: `np.ndarray`
+        :param values: Possible values for attacked feature. For a single column feature this should be a simple list
+                       containing all possible values, in increasing order (the smallest value in the 0 index and so
+                       on). For a multi-column feature (for example 1-hot encoded and then scaled), this should be a
+                       list of lists, where each internal list represents a column (in increasing order) and the values
+                       represent the possible values for that column (in increasing order).
+        :type values: list
         :return: The inferred feature values.
         """
         if y is None:
 
@@ -0,0 +1,154 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+This module implements attribute inference attacks using membership inference attacks.
+"""
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+from typing import Optional, Union, List, TYPE_CHECKING
+
+import numpy as np
+
+from art.estimators.estimator import BaseEstimator
+from art.estimators.classification.classifier import ClassifierMixin
+from art.attacks.attack import AttributeInferenceAttack, MembershipInferenceAttack
+from art.exceptions import EstimatorError
+
+if TYPE_CHECKING:
+    from art.utils import CLASSIFIER_TYPE
+
+logger = logging.getLogger(__name__)
+
+
+class AttributeInferenceMembership(AttributeInferenceAttack):
+    """
+    Implementation of a an attribute inference attack that utilizes a membership inference attack.
+
+    The idea is to find the target feature value that causes the membership inference attack to classify the sample
+    as a member with the highest confidence.
+    """
+
+    _estimator_requirements = (BaseEstimator, ClassifierMixin)
+
+    def __init__(
+        self,
+        classifier: "CLASSIFIER_TYPE",
+        membership_attack: MembershipInferenceAttack,
+        attack_feature: Union[int, slice] = 0,
+    ):
+        """
+        Create an AttributeInferenceMembership attack instance.
+
+        :param classifier: Target classifier.
+        :param membership_attack: The membership inference attack to use. Should be fit/callibrated in advance, and
+                                  should support returning probabilities.
+        :param attack_feature: The index of the feature to be attacked or a slice representing multiple indexes in
+                               case of a one-hot encoded feature.
+        """
+        super().__init__(estimator=classifier, attack_feature=attack_feature)
+        if not all(t in type(classifier).__mro__ for t in membership_attack.estimator_requirements):
+            raise EstimatorError(membership_attack, membership_attack.estimator_requirements, classifier)
+
+        self.membership_attack = membership_attack
+        self._check_params()
+
+    def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
+        """
+        Infer the attacked feature.
+
+        :param x: Input to attack. Includes all features except the attacked feature.
+        :param y: The labels expected by the membership attack.
+        :param values: Possible values for attacked feature. For a single column feature this should be a simple list
+                       containing all possible values, in increasing order (the smallest value in the 0 index and so
+                       on). For a multi-column feature (for example 1-hot encoded and then scaled), this should be a
+                       list of lists, where each internal list represents a column (in increasing order) and the values
+                       represent the possible values for that column (in increasing order).
+        :type values: list
+        :return: The inferred feature values.
+        """
+        if self.estimator.input_shape is not None:
+            if isinstance(self.attack_feature, int) and self.estimator.input_shape[0] != x.shape[1] + 1:
+                raise ValueError("Number of features in x + 1 does not match input_shape of classifier")
+
+        if "values" not in kwargs.keys():
+            raise ValueError("Missing parameter `values`.")
+        values: Optional[List] = kwargs.get("values")
+        if not values:
+            raise ValueError("`values` cannot be None or empty")
+
+        if y is not None:
+            if y.shape[0] != x.shape[0]:
+                raise ValueError("Number of rows in x and y do not match")
+
+        # assumes single index
+        if isinstance(self.attack_feature, int):
+            first = True
+            for value in values:
+                v_full = np.full((x.shape[0], 1), value).astype(np.float32)
+                x_value = np.concatenate((x[:, : self.attack_feature], v_full), axis=1)
+                x_value = np.concatenate((x_value, x[:, self.attack_feature :]), axis=1)
+
+                predicted = self.membership_attack.infer(x_value, y, probabilities=True)
+                if first:
+                    probabilities = predicted[:, 1].reshape(-1, 1)
+                    first = False
+                else:
+                    probabilities = np.hstack((probabilities, predicted[:, 1].reshape(-1, 1)))
+
+            # needs to be of type float so we can later replace back the actual values
+            value_indexes = np.argmax(probabilities, axis=1).astype(np.float32)
+            pred_values = np.zeros_like(value_indexes)
+            for index, value in enumerate(values):
+                pred_values[value_indexes == index] = value
+        else:  # 1-hot encoded feature. Can also be scaled.
+            first = True
+            # assumes that the second value is the "positive" value and that there can only be one positive column
+            for index, value in enumerate(values):
+                curr_value = np.zeros((x.shape[0], len(values)))
+                curr_value[:, index] = value[1]
+                for not_index, not_value in enumerate(values):
+                    if not_index != index:
+                        curr_value[:, not_index] = not_value[0]
+                x_value = np.concatenate((x[:, : self.attack_feature.start], curr_value), axis=1)
+                x_value = np.concatenate((x_value, x[:, self.attack_feature.start :]), axis=1)
+
+                predicted = self.membership_attack.infer(x_value, y, probabilities=True)
+                if first:
+                    probabilities = predicted[:, 1].reshape(-1, 1)
+                else:
+                    probabilities = np.hstack((probabilities, predicted[:, 1].reshape(-1, 1)))
+                first = False
+            value_indexes = np.argmax(probabilities, axis=1).astype(np.float32)
+            pred_values = np.zeros_like(probabilities)
+            for index, value in enumerate(values):
+                curr_value = np.zeros(len(values))
+                curr_value[index] = value[1]
+                for not_index, not_value in enumerate(values):
+                    if not_index != index:
+                        curr_value[not_index] = not_value[0]
+                pred_values[value_indexes == index] = curr_value
+        return pred_values
+
+    def _check_params(self) -> None:
+        if not isinstance(self.attack_feature, int) and not isinstance(self.attack_feature, slice):
+            raise ValueError("Attack feature must be either an integer or a slice object.")
+        if isinstance(self.attack_feature, int) and self.attack_feature < 0:
+            raise ValueError("Attack feature index must be positive.")
+        if not isinstance(self.membership_attack, MembershipInferenceAttack):
+            raise ValueError("membership_attack should be a sub-class of MembershipInferenceAttack")
@@ -67,9 +67,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
         :param x: Input to attack. Includes all features except the attacked feature.
         :param y: Original model's predictions for x.
         :param values: Possible values for attacked feature.
-        :type values: `np.ndarray`
+        :type values: list
         :param priors: Prior distributions of attacked feature values. Same size array as `values`.
-        :type priors: `np.ndarray`
+        :type priors: list
         :return: The inferred feature values.
         """
         if "priors" not in kwargs.keys():
 
@@ -64,9 +64,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
         :param x: Input to attack. Includes all features except the attacked feature.
         :param y: Not used.
         :param values: Possible values for attacked feature.
-        :type values: `np.ndarray`
+        :type values: list
         :param priors: Prior distributions of attacked feature values. Same size array as `values`.
-        :type priors: `np.ndarray`
+        :type priors: list
         :return: The inferred feature values.
         :rtype: `np.ndarray`
         """
 
@@ -28,7 +28,7 @@
 import numpy as np
 from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
 
-from art.attacks.attack import InferenceAttack
+from art.attacks.attack import MembershipInferenceAttack
 from art.estimators.estimator import BaseEstimator, NeuralNetworkMixin
 from art.estimators.classification.classifier import ClassifierMixin
 from art.utils import check_and_transform_label_format
@@ -39,15 +39,15 @@
 logger = logging.getLogger(__name__)
 
 
-class MembershipInferenceBlackBox(InferenceAttack):
+class MembershipInferenceBlackBox(MembershipInferenceAttack):
     """
     Implementation of a learned black-box membership inference attack.
 
     This implementation can use as input to the learning process probabilities/logits or losses,
     depending on the type of model and provided configuration.
     """
 
-    attack_params = InferenceAttack.attack_params + [
+    attack_params = MembershipInferenceAttack.attack_params + [
         "input_type",
         "attack_model_type",
         "attack_model",
@@ -231,10 +231,7 @@ def fit(  # pylint: disable=W0613
                     loss.backward()
                     optimizer.step()
         else:
-            if self.attack_model_type == "gb":
-                y_ready = check_and_transform_label_format(y_new, len(np.unique(y_new)), return_one_hot=False)
-            else:
-                y_ready = check_and_transform_label_format(y_new, len(np.unique(y_new)), return_one_hot=True)
+            y_ready = check_and_transform_label_format(y_new, len(np.unique(y_new)), return_one_hot=False)
             self.attack_model.fit(np.c_[x_1, x_2], y_ready)  # type: ignore
 
     def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
@@ -243,7 +240,10 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
         :param x: Input records to attack.
         :param y: True labels for `x`.
-        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
+        :param probabilities: a boolean indicating whether to return the predicted probabilities per class, or just
+                              the predicted class
+        :return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member,
+                 or class probabilities.
         """
         if y is None:
             raise ValueError("MembershipInferenceBlackBox requires true labels `y`.")
@@ -252,6 +252,11 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
             if self.estimator.input_shape[0] != x.shape[1]:
                 raise ValueError("Shape of x does not match input_shape of classifier")
 
+        if "probabilities" in kwargs.keys():
+            probabilities = kwargs.get("probabilities")
+        else:
+            probabilities = False
+
         y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True)
 
         if y.shape[0] != x.shape[0]:
@@ -274,7 +279,10 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
             for input1, input2, _ in test_loader:
                 input1, input2 = to_cuda(input1), to_cuda(input2)
                 outputs = self.attack_model(input1, input2)  # type: ignore
-                predicted = torch.round(outputs)
+                if not probabilities:
+                    predicted = torch.round(outputs)
+                else:
+                    predicted = outputs
                 predicted = from_cuda(predicted)
 
                 if inferred is None:
@@ -283,12 +291,27 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
                     inferred = np.vstack((inferred, predicted.detach().numpy()))
 
             if inferred is not None:
-                inferred_return = inferred.reshape(-1).astype(np.int)
+                if not probabilities:
+                    inferred_return = inferred.reshape(-1).astype(np.int)
+                else:
+                    inferred = inferred.reshape(-1)
+                    prob_0 = np.ones_like(inferred) - inferred
+                    inferred_return = np.stack((prob_0, inferred), axis=1)
             else:
                 raise ValueError("No data available.")
-        else:
+        elif not self.default_model:
+            # assumes the predict method of the supplied model returns probabilities
             pred = self.attack_model.predict(np.c_[features, y])  # type: ignore
-            inferred_return = np.array([np.argmax(arr) for arr in pred])
+            if probabilities:
+                inferred_return = pred
+            else:
+                inferred_return = np.array([np.argmax(arr) for arr in pred])
+        else:
+            pred = self.attack_model.predict_proba(np.c_[features, y])  # type: ignore
+            if probabilities:
+                inferred_return = pred
+            else:
+                inferred_return = np.array([np.argmax(arr) for arr in pred])
 
         return inferred_return
Original file line number	Diff line number	Diff line change
`@@ -7,3 +7,4 @@`
`7`	`7`	`from art.attacks.inference.attribute_inference.white_box_lifestyle_decision_tree import (`
`8`	`8`	`AttributeInferenceWhiteBoxLifestyleDecisionTree,`
`9`	`9`	`)`
	`10`	`+from art.attacks.inference.attribute_inference.meminf_based import AttributeInferenceMembership`