Merge branch 'dev_1.8.0' into new_boundary_attack

beat-buesser · web-flow · commit d6e8934fcf70 · 2021-08-24T16:56:13.000+01:00
diff --git a/README-cn.md b/README-cn.md
@@ -17,6 +17,7 @@
 [![slack-img](https://img.shields.io/badge/chat-on%20slack-yellow.svg)](https://ibm-art.slack.com/)
 [![Downloads](https://pepy.tech/badge/adversarial-robustness-toolbox)](https://pepy.tech/project/adversarial-robustness-toolbox)
 [![Downloads](https://pepy.tech/badge/adversarial-robustness-toolbox/month)](https://pepy.tech/project/adversarial-robustness-toolbox)
+[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5090/badge)](https://bestpractices.coreinfrastructure.org/projects/5090)
 
 
 对抗性鲁棒性工具集（ART）是用于机器学习安全性的Python库。ART提供的工具可
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@
 [![slack-img](https://img.shields.io/badge/chat-on%20slack-yellow.svg)](https://ibm-art.slack.com/)
 [![Downloads](https://pepy.tech/badge/adversarial-robustness-toolbox)](https://pepy.tech/project/adversarial-robustness-toolbox)
 [![Downloads](https://pepy.tech/badge/adversarial-robustness-toolbox/month)](https://pepy.tech/project/adversarial-robustness-toolbox)
+[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/5090/badge)](https://bestpractices.coreinfrastructure.org/projects/5090)
 
 [中文README请按此处](README-cn.md)
 
diff --git a/art/attacks/inference/attribute_inference/meminf_based.py b/art/attacks/inference/attribute_inference/meminf_based.py
@@ -96,11 +96,11 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
             if y.shape[0] != x.shape[0]:
                 raise ValueError("Number of rows in x and y do not match")
 
-        # assumes single index
+        # single index
         if isinstance(self.attack_feature, int):
             first = True
             for value in values:
-                v_full = np.full((x.shape[0], 1), value).astype(np.float32)
+                v_full = np.full((x.shape[0], 1), value).astype(x.dtype)
                 x_value = np.concatenate((x[:, : self.attack_feature], v_full), axis=1)
                 x_value = np.concatenate((x_value, x[:, self.attack_feature :]), axis=1)
 
@@ -112,7 +112,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
                     probabilities = np.hstack((probabilities, predicted))
 
             # needs to be of type float so we can later replace back the actual values
-            value_indexes = np.argmax(probabilities, axis=1).astype(np.float32)
+            value_indexes = np.argmax(probabilities, axis=1).astype(x.dtype)
             pred_values = np.zeros_like(value_indexes)
             for index, value in enumerate(values):
                 pred_values[value_indexes == index] = value
@@ -134,7 +134,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
                 else:
                     probabilities = np.hstack((probabilities, predicted))
                 first = False
-            value_indexes = np.argmax(probabilities, axis=1).astype(np.float32)
+            value_indexes = np.argmax(probabilities, axis=1).astype(x.dtype)
             pred_values = np.zeros_like(probabilities)
             for index, value in enumerate(values):
                 curr_value = np.zeros(len(values))
diff --git a/art/attacks/inference/attribute_inference/white_box_decision_tree.py b/art/attacks/inference/attribute_inference/white_box_decision_tree.py
@@ -98,7 +98,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
         for i, value in enumerate(values):
             # prepare data with the given value in the attacked feature
-            v_full = np.full((n_samples, 1), value)
+            v_full = np.full((n_samples, 1), value).astype(x.dtype)
             x_value = np.concatenate((x[:, : self.attack_feature], v_full), axis=1)
             x_value = np.concatenate((x_value, x[:, self.attack_feature :]), axis=1)
 
diff --git a/art/attacks/inference/attribute_inference/white_box_lifestyle_decision_tree.py b/art/attacks/inference/attribute_inference/white_box_lifestyle_decision_tree.py
@@ -96,7 +96,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
 
         for i, value in enumerate(values):
             # prepare data with the given value in the attacked feature
-            v_full = np.full((n_samples, 1), value)
+            v_full = np.full((n_samples, 1), value).astype(x.dtype)
             x_value = np.concatenate((x[:, : self.attack_feature], v_full), axis=1)
             x_value = np.concatenate((x_value, x[:, self.attack_feature :]), axis=1)
 
@@ -117,7 +117,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
     def _calculate_phi(self, x, values, n_samples):
         phi = []
         for value in values:
-            v_full = np.full((n_samples, 1), value)
+            v_full = np.full((n_samples, 1), value).astype(x.dtype)
             x_value = np.concatenate((x[:, : self.attack_feature], v_full), axis=1)
             x_value = np.concatenate((x_value, x[:, self.attack_feature :]), axis=1)
             nodes_value = {}
diff --git a/notebooks/README.md b/notebooks/README.md
@@ -54,9 +54,10 @@ demonstrates ART with TensorFlow v2 using tensorflow.keras without eager executi
 or [attack_feature_adversaries_tensorflow_v2.ipynb](attack_feature_adversaries_tensorflow_v2.ipynb) [[on nbviewer](https://nbviewer.jupyter.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/attack_feature_adversaries_tensorflow_v2.ipynb)]
 show how to use ART to create feature adversaries ([Sabour et al., 2016](https://arxiv.org/abs/1511.05122)).
 
-[attack_adversarial_patch.ipynb](adversarial_patch/attack_adversarial_patch.ipynb) [[on nbviewer](https://nbviewer.jupyter.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/attack_adversarial_patch.ipynb)]
+[attack_adversarial_patch.ipynb](adversarial_patch/attack_adversarial_patch.ipynb) [[on nbviewer](https://nbviewer.jupyter.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/adversarial_patch/attack_adversarial_patch.ipynb)]
 shows how to use ART to create real-world adversarial patches that fool real-world object detection and classification 
 models.
+[attack_adversarial_patch_TensorFlowV2.ipynb](adversarial_patch/attack_adversarial_patch.ipynb) [[on nbviewer](https://nbviewer.jupyter.org/github/Trusted-AI/adversarial-robustness-toolbox/blob/main/notebooks/adversarial_patch/attack_adversarial_patch_TensorFlowV2.ipynb)]  TensorFlow v2 specific attack implementation.
 
 <p align="center">
   <img src="../utils/data/images/adversarial_patch.png?raw=true" width="200" title="adversarial_patch">
diff --git a/notebooks/attack_attribute_inference.ipynb b/notebooks/attack_attribute_inference.ipynb
@@ -45,7 +45,7 @@
     "\n",
     "from art.utils import load_nursery\n",
     "\n",
-    "(x_train, y_train), (x_test, y_test), _, _ = load_nursery(test_set=0.8, transform_social=True)"
+    "(x_train, y_train), (x_test, y_test), _, _ = load_nursery(test_set=0.2, transform_social=True)"
    ]
   },
   {
@@ -64,7 +64,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Base model accuracy:  0.9552339604438013\n"
+      "Base model accuracy:  0.9791666666666666\n"
      ]
     }
    ],
@@ -98,20 +98,26 @@
     "import numpy as np\n",
     "from art.attacks.inference.attribute_inference import AttributeInferenceBlackBox\n",
     "\n",
+    "attack_train_ratio = 0.5\n",
+    "attack_train_size = int(len(x_train) * attack_train_ratio)\n",
+    "attack_x_train = x_train[:attack_train_size]\n",
+    "attack_y_train = y_train[:attack_train_size]\n",
+    "attack_x_test = x_train[attack_train_size:]\n",
+    "attack_y_test = y_train[attack_train_size:]\n",
+    "\n",
     "attack_feature = 1  # social\n",
     "\n",
+    "# get original model's predictions\n",
+    "attack_x_test_predictions = np.array([np.argmax(arr) for arr in art_classifier.predict(attack_x_test)]).reshape(-1,1)\n",
     "# only attacked feature\n",
-    "x_train_feature = x_train[:, attack_feature].copy().reshape(-1, 1)\n",
+    "attack_x_test_feature = attack_x_test[:, attack_feature].copy().reshape(-1, 1)\n",
     "# training data without attacked feature\n",
-    "x_train_for_attack = np.delete(x_train, attack_feature, 1)\n",
+    "attack_x_test = np.delete(attack_x_test, attack_feature, 1)\n",
     "\n",
     "bb_attack = AttributeInferenceBlackBox(art_classifier, attack_feature=attack_feature)\n",
     "\n",
-    "# get original model's predictions\n",
-    "x_train_predictions = np.array([np.argmax(arr) for arr in art_classifier.predict(x_train)]).reshape(-1,1)\n",
-    "\n",
     "# train attack model\n",
-    "bb_attack.fit(x_test)"
+    "bb_attack.fit(attack_x_train)"
    ]
   },
   {
@@ -130,24 +136,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6981860285604014\n"
+      "0.5993824778077962\n"
      ]
     }
    ],
    "source": [
     "# get inferred values\n",
     "values = [-0.70718864, 1.41404987]\n",
-    "inferred_train_bb = bb_attack.infer(x_train_for_attack, x_train_predictions, values=values)\n",
+    "inferred_train_bb = bb_attack.infer(attack_x_test, attack_x_test_predictions, values=values)\n",
     "# check accuracy\n",
-    "train_acc = np.sum(inferred_train_bb == np.around(x_train_feature, decimals=8).reshape(1,-1)) / len(inferred_train_bb)\n",
+    "train_acc = np.sum(inferred_train_bb == np.around(attack_x_test_feature, decimals=8).reshape(1,-1)) / len(inferred_train_bb)\n",
     "print(train_acc)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This means that for 70% of the training set, the attacked feature is inferred correctly using this attack."
+    "This means that for 60% of the training set, the attacked feature is inferred correctly using this attack."
    ]
   },
   {
@@ -168,7 +174,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6522578155152451\n"
+      "0.6254341952913933\n"
      ]
     }
    ],
@@ -180,10 +186,10 @@
     "priors = [3465 / 5183, 1718 / 5183]\n",
     "\n",
     "# get inferred values\n",
-    "inferred_train_wb1 = wb_attack.infer(x_train_for_attack, x_train_predictions, values=values, priors=priors)\n",
+    "inferred_train_wb1 = wb_attack.infer(attack_x_test, attack_x_test_predictions, values=values, priors=priors)\n",
     "\n",
     "# check accuracy\n",
-    "train_acc = np.sum(inferred_train_wb1 == np.around(x_train_feature, decimals=8).reshape(1,-1)) / len(inferred_train_wb1)\n",
+    "train_acc = np.sum(inferred_train_wb1 == np.around(attack_x_test_feature, decimals=8).reshape(1,-1)) / len(inferred_train_wb1)\n",
     "print(train_acc)"
    ]
   },
@@ -203,7 +209,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.713624083365496\n"
+      "0.702045542261675\n"
      ]
     }
    ],
@@ -213,18 +219,18 @@
     "wb2_attack = AttributeInferenceWhiteBoxDecisionTree(art_classifier, attack_feature=attack_feature)\n",
     "\n",
     "# get inferred values\n",
-    "inferred_train_wb2 = wb2_attack.infer(x_train_for_attack, x_train_predictions, values=values, priors=priors)\n",
+    "inferred_train_wb2 = wb2_attack.infer(attack_x_test, attack_x_test_predictions, values=values, priors=priors)\n",
     "\n",
     "# check accuracy\n",
-    "train_acc = np.sum(inferred_train_wb2 == np.around(x_train_feature, decimals=8).reshape(1,-1)) / len(inferred_train_wb2)\n",
+    "train_acc = np.sum(inferred_train_wb2 == np.around(attack_x_test_feature, decimals=8).reshape(1,-1)) / len(inferred_train_wb2)\n",
     "print(train_acc)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The white-box attacks are able to correctly infer the attacked feature value in 65% and 71% of the training set respectively. \n",
+    "The white-box attacks are able to correctly infer the attacked feature value in 62% and 70% of the training set respectively. \n",
     "\n",
     "Now let's check the precision and recall:"
    ]
@@ -238,9 +244,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(0.654054054054054, 0.14421930870083433)\n",
-      "(0.3892857142857143, 0.1299165673420739)\n",
-      "(0.6644067796610169, 0.23361144219308702)\n"
+      "(0.3498716852010265, 0.2371014492753623)\n",
+      "(0.3430232558139535, 0.13681159420289854)\n",
+      "(0.6425196850393701, 0.23652173913043478)\n"
      ]
     }
    ],
@@ -270,11 +276,11 @@
     "    return precision, recall\n",
     "    \n",
     "# black-box\n",
-    "print(calc_precision_recall(inferred_train_bb, np.around(x_train_feature, decimals=8), positive_value=1.41404987))\n",
+    "print(calc_precision_recall(inferred_train_bb, np.around(attack_x_test_feature, decimals=8), positive_value=1.41404987))\n",
     "# white-box 1\n",
-    "print(calc_precision_recall(inferred_train_wb1, np.around(x_train_feature, decimals=8), positive_value=1.41404987))\n",
+    "print(calc_precision_recall(inferred_train_wb1, np.around(attack_x_test_feature, decimals=8), positive_value=1.41404987))\n",
     "# white-box 2\n",
-    "print(calc_precision_recall(inferred_train_wb2, np.around(x_train_feature, decimals=8), positive_value=1.41404987))"
+    "print(calc_precision_recall(inferred_train_wb2, np.around(attack_x_test_feature, decimals=8), positive_value=1.41404987))"
    ]
   },
   {
@@ -286,14 +292,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.6761868004631416\n"
+      "0.5247008876881513\n"
      ]
     }
    ],
@@ -303,19 +309,82 @@
     "baseline_attack = AttributeInferenceBaseline(attack_feature=attack_feature)\n",
     "\n",
     "# train attack model\n",
-    "baseline_attack.fit(x_test)\n",
+    "baseline_attack.fit(attack_x_train)\n",
     "# infer values\n",
-    "inferred_train_baseline = baseline_attack.infer(x_train_for_attack, values=values)\n",
+    "inferred_train_baseline = baseline_attack.infer(attack_x_test, values=values)\n",
     "# check accuracy\n",
-    "baseline_train_acc = np.sum(inferred_train_baseline == np.around(x_train_feature, decimals=8).reshape(1,-1)) / len(inferred_train_baseline)\n",
+    "baseline_train_acc = np.sum(inferred_train_baseline == np.around(attack_x_test_feature, decimals=8).reshape(1,-1)) / len(inferred_train_baseline)\n",
     "print(baseline_train_acc)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that both the black-box attack and the second white-box attack do slightly better than the baseline."
+    "We can see that both the black-box and white-box attacks do better than the baseline."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Membership based attack\n",
+    "In this attack the idea is to find the target feature value that maximizes the membership attack confidence, indicating that this is the most probable value for member samples. It can be based on any membership attack (either black-box or white-box) as long as it supports the given model.\n",
+    "\n",
+    "### Train membership attack"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from art.attacks.inference.membership_inference import MembershipInferenceBlackBox\n",
+    "\n",
+    "mem_attack = MembershipInferenceBlackBox(art_classifier)\n",
+    "\n",
+    "mem_attack.fit(x_train[:attack_train_size], y_train[:attack_train_size], x_test, y_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Apply attribute attack"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.6358548822848321\n"
+     ]
+    }
+   ],
+   "source": [
+    "from art.attacks.inference.attribute_inference import AttributeInferenceMembership\n",
+    "\n",
+    "attack = AttributeInferenceMembership(art_classifier, mem_attack, attack_feature=attack_feature)\n",
+    "\n",
+    "# infer values\n",
+    "inferred_train = attack.infer(attack_x_test, attack_y_test, values=values)\n",
+    "\n",
+    "# check accuracy\n",
+    "train_acc = np.sum(inferred_train == np.around(attack_x_test_feature, decimals=8).reshape(1,-1)) / len(inferred_train)\n",
+    "print(train_acc)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that this attack does slightly better than the regular black-box attack, even though it still assumes only black-box access to the model (employs a black-box membership attack). But it is not as good as the white-box attacks."
    ]
   }
  ],
diff --git a/requirements.txt b/requirements.txt
@@ -1,17 +1,17 @@
 # base
 numpy>=1.18.0
-scipy==1.7.0
-matplotlib==3.4.2
+scipy==1.7.1
+matplotlib==3.4.3
 scikit-learn>=0.22.2,<0.24.3
 six==1.16.0
 Pillow==8.3.1
-tqdm==4.61.2
+tqdm==4.62.2
 statsmodels==0.12.2
 pydub==0.25.1
 resampy==0.2.2
 ffmpeg-python==0.2.0
 cma==3.1.0
-pandas==1.3.0
+pandas==1.3.2
 librosa==0.8.1
 numba~=0.53.1
 opencv-python
@@ -28,11 +28,11 @@ mxnet-native==1.8.0.post0
 torch==1.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
 torchvision==0.9.1 -f https://download.pytorch.org/whl/torch_stable.html
-catboost==0.26
+catboost==0.26.1
 GPy==1.10.0
 lightgbm==3.2.1
 xgboost==1.4.2
-kornia~=0.5.6
+kornia~=0.5.8
 tensorboardX==2.4
 lief==0.11.5
 
@@ -47,7 +47,7 @@ pytest~=6.2.4
 pytest-flake8~=1.0.7
 pytest-mock~=3.6.1
 pytest-cov~=2.12.1
-codecov~=2.1.11
+codecov~=2.1.12
 requests~=2.26.0