Trusted-AI
diff --git a/‎art/attacks/inference/attribute_inference/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎art/attacks/inference/attribute_inference/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎art/attacks/inference/attribute_inference/baseline.py‎
Lines changed: 159 additions & 0 deletions b/‎art/attacks/inference/attribute_inference/baseline.py‎
Lines changed: 159 additions & 0 deletions
diff --git a/‎notebooks/attack_attribute_inference.ipynb‎
Lines changed: 59 additions & 18 deletions b/‎notebooks/attack_attribute_inference.ipynb‎
Lines changed: 59 additions & 18 deletions
@@ -2,6 +2,7 @@
 Module providing attribute inference attacks.
 """
 from art.attacks.inference.attribute_inference.black_box import AttributeInferenceBlackBox
+from art.attacks.inference.attribute_inference.baseline import AttributeInferenceBaseline
 from art.attacks.inference.attribute_inference.white_box_decision_tree import AttributeInferenceWhiteBoxDecisionTree
 from art.attacks.inference.attribute_inference.white_box_lifestyle_decision_tree import (
     AttributeInferenceWhiteBoxLifestyleDecisionTree,
 
@@ -0,0 +1,159 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+This module implements attribute inference attacks.
+"""
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+from typing import Optional, Union, TYPE_CHECKING
+
+import numpy as np
+from sklearn.neural_network import MLPClassifier
+
+from art.estimators.estimator import BaseEstimator
+from art.estimators.classification.classifier import ClassifierMixin
+from art.attacks.attack import AttributeInferenceAttack
+from art.utils import check_and_transform_label_format, float_to_categorical, floats_to_one_hot
+
+if TYPE_CHECKING:
+    from art.utils import CLASSIFIER_TYPE
+
+logger = logging.getLogger(__name__)
+
+
+class AttributeInferenceBaseline(AttributeInferenceAttack):
+    """
+    Implementation of a baseline attribute inference, not using a model.
+
+    The idea is to train a simple neural network to learn the attacked feature from the rest of the features. Should
+    be used to compare with other attribute inference results.
+    """
+    _estimator_requirements = ()
+
+    def __init__(
+        self,
+        attack_model: Optional["CLASSIFIER_TYPE"] = None,
+        attack_feature: Union[int, slice] = 0,
+    ):
+        """
+        Create an AttributeInferenceBaseline attack instance.
+
+        :param attack_model: The attack model to train, optional. If none is provided, a default model will be created.
+        :param attack_feature: The index of the feature to be attacked or a slice representing multiple indexes in
+                               case of a one-hot encoded feature.
+        """
+        super().__init__(estimator=None, attack_feature=attack_feature)
+
+        if isinstance(self.attack_feature, int):
+            self.single_index_feature = True
+        else:
+            self.single_index_feature = False
+
+        if attack_model:
+            if ClassifierMixin not in type(attack_model).__mro__:
+                raise ValueError("Attack model must be of type Classifier.")
+            self.attack_model = attack_model
+        else:
+            self.attack_model = MLPClassifier(
+                hidden_layer_sizes=(100,),
+                activation="relu",
+                solver="adam",
+                alpha=0.0001,
+                batch_size="auto",
+                learning_rate="constant",
+                learning_rate_init=0.001,
+                power_t=0.5,
+                max_iter=2000,
+                shuffle=True,
+                random_state=None,
+                tol=0.0001,
+                verbose=False,
+                warm_start=False,
+                momentum=0.9,
+                nesterovs_momentum=True,
+                early_stopping=False,
+                validation_fraction=0.1,
+                beta_1=0.9,
+                beta_2=0.999,
+                epsilon=1e-08,
+                n_iter_no_change=10,
+                max_fun=15000,
+            )
+        self._check_params()
+
+    def fit(self, x: np.ndarray) -> None:
+        """
+        Train the attack model.
+
+        :param x: Input to training process. Includes all features used to train the original model.
+        """
+
+        # Checks:
+        if self.single_index_feature and self.attack_feature >= x.shape[1]:
+            raise ValueError("attack_feature must be a valid index to a feature in x")
+
+        # get vector of attacked feature
+        y = x[:, self.attack_feature]
+        if self.single_index_feature:
+            y_one_hot = float_to_categorical(y)
+        else:
+            y_one_hot = floats_to_one_hot(y)
+        y_ready = check_and_transform_label_format(y_one_hot, len(np.unique(y)), return_one_hot=True)
+
+        # create training set for attack model
+        x_train = np.delete(x, self.attack_feature, 1).astype(np.float32)
+
+        # train attack model
+        self.attack_model.fit(x_train, y_ready)
+
+    def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
+        """
+        Infer the attacked feature.
+
+        :param x: Input to attack. Includes all features except the attacked feature.
+        :param y: Not used in this attack.
+        :param values: Possible values for attacked feature. Only needed in case of categorical feature (not one-hot).
+        :type values: `np.ndarray`
+        :return: The inferred feature values.
+        """
+        x_test = x.astype(np.float32)
+
+        if self.single_index_feature:
+            if "values" not in kwargs.keys():
+                raise ValueError("Missing parameter `values`.")
+            values: np.ndarray = kwargs.get("values")
+            return np.array([values[np.argmax(arr)] for arr in self.attack_model.predict(x_test)])
+        else:
+            if "values" in kwargs.keys():
+                values = kwargs.get("values")
+                predictions = self.attack_model.predict(x_test).astype(np.float32)
+                i = 0
+                for column in predictions.T:
+                    for index in range(len(values[i])):
+                        np.place(column, [column == index], values[i][index])
+                    i += 1
+                return np.array(predictions)
+            else:
+                return np.array(self.attack_model.predict(x_test))
+
+    def _check_params(self) -> None:
+        if not isinstance(self.attack_feature, int) and not isinstance(self.attack_feature, slice):
+            raise ValueError("Attack feature must be either an integer or a slice object.")
+        if isinstance(self.attack_feature, int) and self.attack_feature < 0:
+            raise ValueError("Attack feature index must be positive.")
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -57,14 +57,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Base model accuracy:  0.9489628557645924\n"
+      "Base model accuracy:  0.9552339604438013\n"
      ]
     }
    ],
@@ -91,12 +91,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
     "import numpy as np\n",
-    "from art.attacks.inference import AttributeInferenceBlackBox\n",
+    "from art.attacks.inference.attribute_inference import AttributeInferenceBlackBox\n",
     "\n",
     "attack_feature = 1  # social\n",
     "\n",
@@ -123,14 +123,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.7055191045928213\n"
+      "0.6981860285604014\n"
      ]
     }
    ],
@@ -161,19 +161,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6526437668853724\n"
+      "0.6522578155152451\n"
      ]
     }
    ],
    "source": [
-    "from art.attacks.inference import AttributeInferenceWhiteBoxLifestyleDecisionTree\n",
+    "from art.attacks.inference.attribute_inference import AttributeInferenceWhiteBoxLifestyleDecisionTree\n",
     "\n",
     "wb_attack = AttributeInferenceWhiteBoxLifestyleDecisionTree(art_classifier, attack_feature=attack_feature)\n",
     "\n",
@@ -196,19 +196,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.7124662292551138\n"
+      "0.713624083365496\n"
      ]
     }
    ],
    "source": [
-    "from art.attacks.inference import AttributeInferenceWhiteBoxDecisionTree\n",
+    "from art.attacks.inference.attribute_inference import AttributeInferenceWhiteBoxDecisionTree\n",
     "\n",
     "wb2_attack = AttributeInferenceWhiteBoxDecisionTree(art_classifier, attack_feature=attack_feature)\n",
     "\n",
@@ -231,16 +231,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.7638888888888888, 0.13110846245530394)\n",
-      "(0.3849056603773585, 0.12157330154946365)\n",
-      "(0.6666666666666666, 0.22407628128724671)\n"
+      "(0.654054054054054, 0.14421930870083433)\n",
+      "(0.3892857142857143, 0.1299165673420739)\n",
+      "(0.6644067796610169, 0.23361144219308702)\n"
      ]
     }
    ],
@@ -276,6 +276,47 @@
     "# white-box 2\n",
     "print(calc_precision_recall(inferred_train_wb2, np.around(x_train_feature, decimals=8), positive_value=1.41404987))"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To verify the significance of these results, we now run a baseline attack that uses only the remaining features to try to predict the value of the attacked feature, with no use of the model itself."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6761868004631416\n"
+     ]
+    }
+   ],
+   "source": [
+    "from art.attacks.inference.attribute_inference import AttributeInferenceBaseline\n",
+    "\n",
+    "baseline_attack = AttributeInferenceBaseline(attack_feature=attack_feature)\n",
+    "\n",
+    "# train attack model\n",
+    "baseline_attack.fit(x_test)\n",
+    "# infer values\n",
+    "inferred_train_baseline = baseline_attack.infer(x_train_for_attack, values=values)\n",
+    "# check accuracy\n",
+    "baseline_train_acc = np.sum(inferred_train_baseline == np.around(x_train_feature, decimals=8).reshape(1,-1)) / len(inferred_train_baseline)\n",
+    "print(baseline_train_acc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that both the black-box attack and the second white-box attack do slightly better than the baseline."
+   ]
   }
  ],
  "metadata": {
@@ -294,7 +335,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.1"
+   "version": "3.8.3"
   }
  },
  "nbformat": 4,