Skip to content

Commit bd410fb

Browse files
authored
Merge pull request #720 from Trusted-AI/development_issue_595
Implement Label-Only Boundary Distance Attack and Gap Attack for Membership Inference
2 parents b46ad0d + 43986c1 commit bd410fb

File tree

15 files changed

+533
-90
lines changed

15 files changed

+533
-90
lines changed

art/attacks/inference/membership_inference/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,5 @@
33
"""
44
from art.attacks.inference.membership_inference.black_box import MembershipInferenceBlackBox
55
from art.attacks.inference.membership_inference.black_box_rule_based import MembershipInferenceBlackBoxRuleBased
6+
from art.attacks.inference.membership_inference.label_only_gap_attack import LabelOnlyGapAttack
7+
from art.attacks.inference.membership_inference.label_only_boundary_distance import LabelOnlyDecisionBoundary

art/attacks/inference/membership_inference/black_box_rule_based.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class MembershipInferenceBlackBoxRuleBased(InferenceAttack):
4646
member. Otherwise, it is not a member.
4747
"""
4848

49+
attack_params = InferenceAttack.attack_params
4950
_estimator_requirements = (BaseEstimator, ClassifierMixin)
5051

5152
def __init__(self, classifier: "CLASSIFIER_TYPE"):
@@ -71,10 +72,9 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
7172
raise ValueError("Shape of x does not match input_shape of classifier")
7273

7374
y = check_and_transform_label_format(y, len(np.unique(y)), return_one_hot=True)
74-
y = np.array([np.argmax(arr) for arr in y]).reshape(-1, 1)
7575
if y.shape[0] != x.shape[0]:
7676
raise ValueError("Number of rows in x and y do not match")
7777

7878
# get model's predictions for x
79-
predictions = np.array([np.argmax(arr) for arr in self.estimator.predict(x)]).reshape(-1, 1)
80-
return np.asarray([1 if p == y[index] else 0 for index, p in enumerate(predictions)])
79+
y_pred = self.estimator.predict(x=x)
80+
return (np.argmax(y, axis=1) == np.argmax(y_pred, axis=1)).astype(np.int)
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements the Label-Only Inference Attack based on Decision Boundary.
20+
21+
| Paper link: https://arxiv.org/abs/2007.14321
22+
"""
23+
import logging
24+
from typing import Optional, NoReturn, TYPE_CHECKING
25+
26+
import numpy as np
27+
28+
from art.attacks.attack import InferenceAttack
29+
from art.estimators.estimator import BaseEstimator
30+
from art.estimators.classification.classifier import ClassifierMixin
31+
32+
if TYPE_CHECKING:
33+
from art.utils import CLASSIFIER_TYPE
34+
35+
logger = logging.getLogger(__name__)
36+
37+
38+
class LabelOnlyDecisionBoundary(InferenceAttack):
39+
"""
40+
Implementation of Label-Only Inference Attack based on Decision Boundary.
41+
42+
| Paper link: https://arxiv.org/abs/2007.14321
43+
"""
44+
45+
attack_params = InferenceAttack.attack_params + [
46+
"distance_threshold_tau",
47+
]
48+
_estimator_requirements = (BaseEstimator, ClassifierMixin)
49+
50+
def __init__(self, estimator: "CLASSIFIER_TYPE", distance_threshold_tau: Optional[float] = None):
51+
"""
52+
Create a `LabelOnlyDecisionBoundary` instance for Label-Only Inference Attack based on Decision Boundary.
53+
54+
:param estimator: A trained classification estimator.
55+
:param distance_threshold_tau: Threshold distance for decision boundary. Samples with boundary distances larger
56+
than threshold are considered members of the training dataset.
57+
"""
58+
super().__init__(estimator=estimator)
59+
self.distance_threshold_tau = distance_threshold_tau
60+
self._check_params()
61+
62+
def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
63+
"""
64+
Infer membership of input `x` in estimator's training data.
65+
66+
:param x: Input data.
67+
:param y: True labels for `x`.
68+
:param kwargs: Parameters for HopSkipJump attack except argument `estimator`.
69+
:return: An array holding the inferred membership status, 1 indicates a member and 0 indicates non-member.
70+
"""
71+
from art.attacks.evasion.hop_skip_jump import HopSkipJump
72+
73+
hsj = HopSkipJump(classifier=self.estimator, **kwargs)
74+
x_adv = hsj.generate(x=x, y=y)
75+
76+
distance = np.linalg.norm((x_adv - x).reshape((x.shape[0], -1)), ord=2, axis=1)
77+
78+
y_pred = self.estimator.predict(x=x)
79+
80+
distance[np.argmax(y_pred, axis=1) != np.argmax(y, axis=1)] = 0
81+
82+
is_member = np.where(distance > self.distance_threshold_tau, 1, 0)
83+
84+
return is_member
85+
86+
def calibrate_distance_threshold(
87+
self,
88+
classifier_train: "CLASSIFIER_TYPE",
89+
x_train: np.ndarray,
90+
y_train: np.ndarray,
91+
x_test: np.ndarray,
92+
y_test: np.ndarray,
93+
**kwargs
94+
) -> NoReturn:
95+
"""
96+
Calibrate distance threshold maximising the membership inference accuracy on `x_train` and `x_test`.
97+
98+
:param classifier_train: A trained classifier
99+
:param x_train: Training data.
100+
:param y_train: Labels of training data `x_train`.
101+
:param x_test: Test data.
102+
:param y_test: Labels of test data `x_test`.
103+
"""
104+
from art.attacks.evasion.hop_skip_jump import HopSkipJump
105+
106+
hsj = HopSkipJump(classifier=classifier_train, **kwargs)
107+
108+
x_train_adv = hsj.generate(x=x_train, y=y_train)
109+
x_test_adv = hsj.generate(x=x_test, y=y_test)
110+
111+
distance_train = np.linalg.norm((x_train_adv - x_train).reshape((x_train.shape[0], -1)), ord=2, axis=1)
112+
distance_test = np.linalg.norm((x_test_adv - x_test).reshape((x_test.shape[0], -1)), ord=2, axis=1)
113+
114+
y_train_pred = self.estimator.predict(x=x_train)
115+
y_test_pred = self.estimator.predict(x=x_test)
116+
117+
distance_train[np.argmax(y_train_pred, axis=1) != np.argmax(y_train, axis=1)] = 0
118+
distance_test[np.argmax(y_test_pred, axis=1) != np.argmax(y_test, axis=1)] = 0
119+
120+
num_increments = 100
121+
tau_increment = np.amax([np.amax(distance_train), np.amax(distance_test)]) / num_increments
122+
123+
acc_max = 0.0
124+
distance_threshold_tau = 0.0
125+
126+
for i_tau in range(1, num_increments):
127+
128+
is_member_train = np.where(distance_train > i_tau * tau_increment, 1, 0)
129+
is_member_test = np.where(distance_test > i_tau * tau_increment, 1, 0)
130+
131+
acc = (np.sum(is_member_train) + (is_member_test.shape[0] - np.sum(is_member_test))) / (
132+
is_member_train.shape[0] + is_member_test.shape[0]
133+
)
134+
135+
if acc > acc_max:
136+
distance_threshold_tau = i_tau * tau_increment
137+
acc_max = acc
138+
139+
self.distance_threshold_tau = distance_threshold_tau
140+
141+
def _check_params(self) -> None:
142+
if not isinstance(self.distance_threshold_tau, (int, float)) or self.distance_threshold_tau <= 0.0:
143+
raise ValueError("The distance threshold `distance_threshold_tau` needs to be a positive float.")
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements the Label Only Gap Attack `.
20+
21+
| Paper link: https://arxiv.org/abs/2007.14321
22+
"""
23+
import logging
24+
25+
from art.attacks.inference.membership_inference import MembershipInferenceBlackBoxRuleBased
26+
27+
28+
logger = logging.getLogger(__name__)
29+
30+
31+
LabelOnlyGapAttack = MembershipInferenceBlackBoxRuleBased

tests/attacks/inference/attribute_inference/__init__.py

Whitespace-only changes.

tests/attacks/inference/test_attribute_inference.py renamed to tests/attacks/inference/attribute_inference/test_black_box.py

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,9 @@
2525
import torch.optim as optim
2626

2727
from art.attacks.inference.attribute_inference.black_box import AttributeInferenceBlackBox
28-
from art.attacks.inference.attribute_inference.white_box_decision_tree import AttributeInferenceWhiteBoxDecisionTree
29-
from art.attacks.inference.attribute_inference.white_box_lifestyle_decision_tree import (
30-
AttributeInferenceWhiteBoxLifestyleDecisionTree,
31-
)
3228
from art.estimators.classification.pytorch import PyTorchClassifier
3329
from art.estimators.estimator import BaseEstimator
3430
from art.estimators.classification import ClassifierMixin
35-
from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier
3631

3732
from tests.attacks.utils import backend_test_classifier_type_check_fail
3833
from tests.utils import ARTTestException
@@ -148,67 +143,5 @@ def transform_feature(x):
148143
art_warning(e)
149144

150145

151-
@pytest.mark.skipMlFramework("dl_frameworks")
152-
def test_white_box(art_warning, decision_tree_estimator, get_iris_dataset):
153-
try:
154-
attack_feature = 2 # petal length
155-
values = [0.14, 0.42, 0.71] # rounded down
156-
priors = [50 / 150, 54 / 150, 46 / 150]
157-
158-
(x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
159-
x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
160-
x_train_feature = x_train_iris[:, attack_feature]
161-
x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
162-
x_test_feature = x_test_iris[:, attack_feature]
163-
164-
classifier = decision_tree_estimator()
165-
166-
attack = AttributeInferenceWhiteBoxDecisionTree(classifier, attack_feature=attack_feature)
167-
x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
168-
x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
169-
inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values, priors=priors)
170-
inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values, priors=priors)
171-
train_diff = np.abs(inferred_train - x_train_feature.reshape(1, -1))
172-
test_diff = np.abs(inferred_test - x_test_feature.reshape(1, -1))
173-
assert np.sum(train_diff) / len(inferred_train) == pytest.approx(0.2108, abs=0.03)
174-
assert np.sum(test_diff) / len(inferred_test) == pytest.approx(0.1988, abs=0.03)
175-
except ARTTestException as e:
176-
art_warning(e)
177-
178-
179-
@pytest.mark.skipMlFramework("dl_frameworks")
180-
def test_white_box_lifestyle(art_warning, decision_tree_estimator, get_iris_dataset):
181-
try:
182-
attack_feature = 2 # petal length
183-
values = [0.14, 0.42, 0.71] # rounded down
184-
priors = [50 / 150, 54 / 150, 46 / 150]
185-
186-
(x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
187-
x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
188-
x_train_feature = x_train_iris[:, attack_feature]
189-
x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
190-
x_test_feature = x_test_iris[:, attack_feature]
191-
192-
classifier = decision_tree_estimator()
193-
attack = AttributeInferenceWhiteBoxLifestyleDecisionTree(classifier, attack_feature=attack_feature)
194-
x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
195-
x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
196-
inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values, priors=priors)
197-
inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values, priors=priors)
198-
train_diff = np.abs(inferred_train - x_train_feature.reshape(1, -1))
199-
test_diff = np.abs(inferred_test - x_test_feature.reshape(1, -1))
200-
assert np.sum(train_diff) / len(inferred_train) == pytest.approx(0.3357, abs=0.03)
201-
assert np.sum(test_diff) / len(inferred_test) == pytest.approx(0.3149, abs=0.03)
202-
# assert np.sum(train_diff) / len(inferred_train) < np.sum(test_diff) / len(inferred_test)
203-
except ARTTestException as e:
204-
art_warning(e)
205-
206-
207146
def test_classifier_type_check_fail():
208147
backend_test_classifier_type_check_fail(AttributeInferenceBlackBox, (BaseEstimator, ClassifierMixin))
209-
backend_test_classifier_type_check_fail(
210-
AttributeInferenceWhiteBoxLifestyleDecisionTree, (ScikitlearnDecisionTreeClassifier,)
211-
)
212-
backend_test_classifier_type_check_fail(
213-
AttributeInferenceWhiteBoxDecisionTree, (ScikitlearnDecisionTreeClassifier,)
214-
)
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
from __future__ import absolute_import, division, print_function, unicode_literals
19+
20+
import logging
21+
import pytest
22+
23+
import numpy as np
24+
25+
from art.attacks.inference.attribute_inference.white_box_decision_tree import AttributeInferenceWhiteBoxDecisionTree
26+
from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier
27+
28+
from tests.attacks.utils import backend_test_classifier_type_check_fail
29+
from tests.utils import ARTTestException
30+
31+
logger = logging.getLogger(__name__)
32+
33+
34+
@pytest.mark.skipMlFramework("dl_frameworks")
35+
def test_white_box(art_warning, decision_tree_estimator, get_iris_dataset):
36+
try:
37+
attack_feature = 2 # petal length
38+
values = [0.14, 0.42, 0.71] # rounded down
39+
priors = [50 / 150, 54 / 150, 46 / 150]
40+
41+
(x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
42+
x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
43+
x_train_feature = x_train_iris[:, attack_feature]
44+
x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
45+
x_test_feature = x_test_iris[:, attack_feature]
46+
47+
classifier = decision_tree_estimator()
48+
49+
attack = AttributeInferenceWhiteBoxDecisionTree(classifier, attack_feature=attack_feature)
50+
x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
51+
x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
52+
inferred_train = attack.infer(x_train_for_attack, x_train_predictions, values=values, priors=priors)
53+
inferred_test = attack.infer(x_test_for_attack, x_test_predictions, values=values, priors=priors)
54+
train_diff = np.abs(inferred_train - x_train_feature.reshape(1, -1))
55+
test_diff = np.abs(inferred_test - x_test_feature.reshape(1, -1))
56+
assert np.sum(train_diff) / len(inferred_train) == pytest.approx(0.2108, abs=0.03)
57+
assert np.sum(test_diff) / len(inferred_test) == pytest.approx(0.1988, abs=0.03)
58+
except ARTTestException as e:
59+
art_warning(e)
60+
61+
62+
def test_classifier_type_check_fail():
63+
backend_test_classifier_type_check_fail(
64+
AttributeInferenceWhiteBoxDecisionTree, (ScikitlearnDecisionTreeClassifier,)
65+
)

0 commit comments

Comments
 (0)