Skip to content

Commit eddade6

Browse files
authored
Merge pull request #1588 from abigailgold/dev_1.10.0_att_fix
Support slices of size 1 in attribute attacks
2 parents bdab55a + 70dfa2a commit eddade6

File tree

9 files changed

+281
-33
lines changed

9 files changed

+281
-33
lines changed

art/attacks/inference/attribute_inference/baseline.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,13 @@
2929

3030
from art.estimators.classification.classifier import ClassifierMixin
3131
from art.attacks.attack import AttributeInferenceAttack
32-
from art.utils import check_and_transform_label_format, float_to_categorical, floats_to_one_hot, get_feature_values
32+
from art.utils import (
33+
check_and_transform_label_format,
34+
float_to_categorical,
35+
floats_to_one_hot,
36+
get_feature_values,
37+
get_feature_index,
38+
)
3339

3440
if TYPE_CHECKING:
3541
from art.utils import CLASSIFIER_TYPE
@@ -65,11 +71,6 @@ def __init__(
6571
"""
6672
super().__init__(estimator=None, attack_feature=attack_feature)
6773

68-
if isinstance(self.attack_feature, int):
69-
self.single_index_feature = True
70-
else:
71-
self.single_index_feature = False
72-
7374
self._values: Optional[list] = None
7475

7576
if attack_model:
@@ -108,6 +109,7 @@ def __init__(
108109
raise ValueError("Illegal value for parameter `attack_model_type`.")
109110

110111
self._check_params()
112+
self.attack_feature = get_feature_index(self.attack_feature)
111113

112114
def fit(self, x: np.ndarray) -> None:
113115
"""
@@ -117,13 +119,13 @@ def fit(self, x: np.ndarray) -> None:
117119
"""
118120

119121
# Checks:
120-
if self.single_index_feature and isinstance(self.attack_feature, int) and self.attack_feature >= x.shape[1]:
122+
if isinstance(self.attack_feature, int) and self.attack_feature >= x.shape[1]:
121123
raise ValueError("attack_feature must be a valid index to a feature in x")
122124

123125
# get vector of attacked feature
124126
y = x[:, self.attack_feature]
125-
self._values = get_feature_values(y, self.single_index_feature)
126-
if self.single_index_feature:
127+
self._values = get_feature_values(y, isinstance(self.attack_feature, int))
128+
if isinstance(self.attack_feature, int):
127129
y_one_hot = float_to_categorical(y)
128130
else:
129131
y_one_hot = floats_to_one_hot(y)
@@ -161,7 +163,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
161163
predictions = self.attack_model.predict(x_test).astype(np.float32)
162164

163165
if self._values is not None:
164-
if self.single_index_feature:
166+
if isinstance(self.attack_feature, int):
165167
predictions = np.array([self._values[np.argmax(arr)] for arr in predictions])
166168
else:
167169
i = 0

art/attacks/inference/attribute_inference/black_box.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@
3232
from art.estimators.classification.classifier import ClassifierMixin
3333
from art.attacks.attack import AttributeInferenceAttack
3434
from art.estimators.regression import RegressorMixin
35-
from art.utils import check_and_transform_label_format, float_to_categorical, floats_to_one_hot, get_feature_values
35+
from art.utils import (
36+
check_and_transform_label_format,
37+
float_to_categorical,
38+
floats_to_one_hot,
39+
get_feature_values,
40+
get_feature_index,
41+
)
3642

3743
if TYPE_CHECKING:
3844
from art.utils import CLASSIFIER_TYPE, REGRESSOR_TYPE
@@ -83,10 +89,6 @@ def __init__(
8389
`estimator` is a regressor and if `scale_range` is not supplied.
8490
"""
8591
super().__init__(estimator=estimator, attack_feature=attack_feature)
86-
if isinstance(self.attack_feature, int):
87-
self.single_index_feature = True
88-
else:
89-
self.single_index_feature = False
9092

9193
self._values: Optional[list] = None
9294
self._attack_model_type = attack_model_type
@@ -131,6 +133,7 @@ def __init__(
131133
self.scale_range = scale_range
132134

133135
self._check_params()
136+
self.attack_feature = get_feature_index(self.attack_feature)
134137

135138
def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
136139
"""
@@ -144,7 +147,7 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
144147
if self.estimator.input_shape is not None:
145148
if self.estimator.input_shape[0] != x.shape[1]:
146149
raise ValueError("Shape of x does not match input_shape of model")
147-
if self.single_index_feature and isinstance(self.attack_feature, int) and self.attack_feature >= x.shape[1]:
150+
if isinstance(self.attack_feature, int) and self.attack_feature >= x.shape[1]:
148151
raise ValueError("`attack_feature` must be a valid index to a feature in x")
149152

150153
# get model's predictions for x
@@ -162,8 +165,8 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
162165

163166
# get vector of attacked feature
164167
y_attack = x[:, self.attack_feature]
165-
self._values = get_feature_values(y_attack, self.single_index_feature)
166-
if self.single_index_feature:
168+
self._values = get_feature_values(y_attack, isinstance(self.attack_feature, int))
169+
if isinstance(self.attack_feature, int):
167170
y_one_hot = float_to_categorical(y_attack)
168171
else:
169172
y_one_hot = floats_to_one_hot(y_attack)
@@ -210,7 +213,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
210213
if pred.shape[0] != x.shape[0]:
211214
raise ValueError("Number of rows in x and y do not match")
212215
if self.estimator.input_shape is not None:
213-
if self.single_index_feature and self.estimator.input_shape[0] != x.shape[1] + 1:
216+
if isinstance(self.attack_feature, int) and self.estimator.input_shape[0] != x.shape[1] + 1:
214217
raise ValueError("Number of features in x + 1 does not match input_shape of model")
215218

216219
if RegressorMixin in type(self.estimator).__mro__:
@@ -234,7 +237,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
234237
predictions = self.attack_model.predict(x_test).astype(np.float32)
235238

236239
if self._values is not None:
237-
if self.single_index_feature:
240+
if isinstance(self.attack_feature, int):
238241
predictions = np.array([self._values[np.argmax(arr)] for arr in predictions])
239242
else:
240243
i = 0

art/attacks/inference/attribute_inference/meminf_based.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from art.attacks.attack import AttributeInferenceAttack, MembershipInferenceAttack
3131
from art.estimators.regression import RegressorMixin
3232
from art.exceptions import EstimatorError
33+
from art.utils import get_feature_index
3334

3435
if TYPE_CHECKING:
3536
from art.utils import CLASSIFIER_TYPE, REGRESSOR_TYPE
@@ -68,6 +69,7 @@ def __init__(
6869

6970
self.membership_attack = membership_attack
7071
self._check_params()
72+
self.attack_feature = get_feature_index(self.attack_feature)
7173

7274
def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.ndarray:
7375
"""
@@ -104,7 +106,6 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
104106
v_full = np.full((x.shape[0], 1), value).astype(x.dtype)
105107
x_value = np.concatenate((x[:, : self.attack_feature], v_full), axis=1)
106108
x_value = np.concatenate((x_value, x[:, self.attack_feature :]), axis=1)
107-
108109
predicted = self.membership_attack.infer(x_value, y, probabilities=True)
109110
if first:
110111
probabilities = predicted

art/attacks/inference/attribute_inference/true_label_baseline.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,13 @@
3030

3131
from art.estimators.classification.classifier import ClassifierMixin
3232
from art.attacks.attack import AttributeInferenceAttack
33-
from art.utils import check_and_transform_label_format, float_to_categorical, floats_to_one_hot, get_feature_values
33+
from art.utils import (
34+
check_and_transform_label_format,
35+
float_to_categorical,
36+
floats_to_one_hot,
37+
get_feature_values,
38+
get_feature_index,
39+
)
3440

3541
if TYPE_CHECKING:
3642
from art.utils import CLASSIFIER_TYPE
@@ -74,11 +80,6 @@ def __init__(
7480
"""
7581
super().__init__(estimator=None, attack_feature=attack_feature)
7682

77-
if isinstance(self.attack_feature, int):
78-
self.single_index_feature = True
79-
else:
80-
self.single_index_feature = False
81-
8283
self._values: Optional[list] = None
8384

8485
if attack_model:
@@ -119,6 +120,7 @@ def __init__(
119120
self.prediction_normal_factor = prediction_normal_factor
120121
self.scale_range = scale_range
121122
self._check_params()
123+
self.attack_feature = get_feature_index(self.attack_feature)
122124

123125
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
124126
"""
@@ -129,13 +131,13 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
129131
"""
130132

131133
# Checks:
132-
if self.single_index_feature and isinstance(self.attack_feature, int) and self.attack_feature >= x.shape[1]:
134+
if isinstance(self.attack_feature, int) and self.attack_feature >= x.shape[1]:
133135
raise ValueError("attack_feature must be a valid index to a feature in x")
134136

135137
# get vector of attacked feature
136138
attacked_feature = x[:, self.attack_feature]
137-
self._values = get_feature_values(attacked_feature, self.single_index_feature)
138-
if self.single_index_feature:
139+
self._values = get_feature_values(attacked_feature, isinstance(self.attack_feature, int))
140+
if isinstance(self.attack_feature, int):
139141
y_one_hot = float_to_categorical(attacked_feature)
140142
else:
141143
y_one_hot = floats_to_one_hot(attacked_feature)
@@ -187,7 +189,7 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
187189
predictions = self.attack_model.predict(x_test).astype(np.float32)
188190

189191
if self._values is not None:
190-
if self.single_index_feature:
192+
if isinstance(self.attack_feature, int):
191193
predictions = np.array([self._values[np.argmax(arr)] for arr in predictions])
192194
else:
193195
i = 0

art/utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,30 @@ def get_feature_values(x: np.ndarray, single_index_feature: bool) -> list:
675675
return values
676676

677677

678+
def get_feature_index(feature: Union[int, slice]) -> Union[int, slice]:
679+
"""
680+
Returns a modified feature index: in case of a slice of size 1, returns the corresponding integer. Otherwise,
681+
returns the same value (integer or slice) as passed.
682+
683+
:param feature: The index or slice representing a feature to attack
684+
:return: An integer representing a single column index or a slice representing a multi-column index
685+
"""
686+
if isinstance(feature, int):
687+
return feature
688+
689+
start = feature.start
690+
stop = feature.stop
691+
step = feature.step
692+
if start is None:
693+
start = 0
694+
if step is None:
695+
step = 1
696+
if feature.stop is not None and ((stop - start) // step) == 1:
697+
return start
698+
699+
return feature
700+
701+
678702
def compute_success_array(
679703
classifier: "CLASSIFIER_TYPE",
680704
x_clean: np.ndarray,

tests/attacks/inference/attribute_inference/test_baseline.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,60 @@ def transform_feature(x):
8181
art_warning(e)
8282

8383

84+
@pytest.mark.skip_framework("dl_frameworks")
85+
@pytest.mark.parametrize("model_type", ["nn", "rf"])
86+
def test_black_box_baseline_slice(art_warning, get_iris_dataset, model_type):
87+
try:
88+
attack_feature = 2 # petal length
89+
90+
# need to transform attacked feature into categorical
91+
def transform_feature(x):
92+
x[x > 0.5] = 2.0
93+
x[(x > 0.2) & (x <= 0.5)] = 1.0
94+
x[x <= 0.2] = 0.0
95+
96+
values = [0.0, 1.0, 2.0]
97+
98+
(x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
99+
100+
# training data without attacked feature
101+
x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
102+
# only attacked feature
103+
x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
104+
transform_feature(x_train_feature)
105+
# training data with attacked feature (after transformation)
106+
x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
107+
x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
108+
109+
# test data without attacked feature
110+
x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
111+
# only attacked feature
112+
x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
113+
transform_feature(x_test_feature)
114+
115+
baseline_attack = AttributeInferenceBaseline(
116+
attack_feature=slice(attack_feature, attack_feature + 1), attack_model_type=model_type
117+
)
118+
# train attack model
119+
baseline_attack.fit(x_train)
120+
# infer attacked feature
121+
baseline_inferred_train = baseline_attack.infer(x_train_for_attack, values=values)
122+
baseline_inferred_test = baseline_attack.infer(x_test_for_attack, values=values)
123+
# check accuracy
124+
baseline_train_acc = np.sum(baseline_inferred_train == x_train_feature.reshape(1, -1)) / len(
125+
baseline_inferred_train
126+
)
127+
baseline_test_acc = np.sum(baseline_inferred_test == x_test_feature.reshape(1, -1)) / len(
128+
baseline_inferred_test
129+
)
130+
131+
assert 0.8 <= baseline_train_acc
132+
assert 0.7 <= baseline_test_acc
133+
134+
except ARTTestException as e:
135+
art_warning(e)
136+
137+
84138
@pytest.mark.skip_framework("dl_frameworks")
85139
@pytest.mark.parametrize("model_type", ["nn", "rf"])
86140
def test_black_box_baseline_no_values(art_warning, get_iris_dataset, model_type):

tests/attacks/inference/attribute_inference/test_black_box.py

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,59 @@ def transform_feature(x):
8383
# check accuracy
8484
train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
8585
test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
86+
assert pytest.approx(0.8285, abs=0.2) == train_acc
87+
assert pytest.approx(0.8888, abs=0.18) == test_acc
88+
89+
except ARTTestException as e:
90+
art_warning(e)
91+
92+
93+
@pytest.mark.skip_framework("dl_frameworks")
94+
@pytest.mark.parametrize("model_type", ["nn", "rf"])
95+
def test_black_box_slice(art_warning, decision_tree_estimator, get_iris_dataset, model_type):
96+
try:
97+
attack_feature = 2 # petal length
98+
99+
# need to transform attacked feature into categorical
100+
def transform_feature(x):
101+
x[x > 0.5] = 2.0
102+
x[(x > 0.2) & (x <= 0.5)] = 1.0
103+
x[x <= 0.2] = 0.0
104+
105+
values = [0.0, 1.0, 2.0]
106+
107+
(x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset
108+
# training data without attacked feature
109+
x_train_for_attack = np.delete(x_train_iris, attack_feature, 1)
110+
# only attacked feature
111+
x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1)
112+
transform_feature(x_train_feature)
113+
# training data with attacked feature (after transformation)
114+
x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
115+
x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
116+
117+
# test data without attacked feature
118+
x_test_for_attack = np.delete(x_test_iris, attack_feature, 1)
119+
# only attacked feature
120+
x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1)
121+
transform_feature(x_test_feature)
122+
123+
classifier = decision_tree_estimator()
124+
125+
attack = AttributeInferenceBlackBox(
126+
classifier, attack_feature=slice(attack_feature, attack_feature + 1), attack_model_type=model_type
127+
)
128+
# get original model's predictions
129+
x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train_iris)]).reshape(-1, 1)
130+
x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test_iris)]).reshape(-1, 1)
131+
# train attack model
132+
attack.fit(x_train)
133+
# infer attacked feature
134+
inferred_train = attack.infer(x_train_for_attack, pred=x_train_predictions, values=values)
135+
inferred_test = attack.infer(x_test_for_attack, pred=x_test_predictions, values=values)
136+
# check accuracy
137+
train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
138+
test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
86139
assert pytest.approx(0.8285, abs=0.12) == train_acc
87140
assert pytest.approx(0.8888, abs=0.18) == test_acc
88141

@@ -135,7 +188,7 @@ def transform_feature(x):
135188
train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
136189
test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
137190
assert pytest.approx(0.8285, abs=0.12) == train_acc
138-
assert pytest.approx(0.8888, abs=0.16) == test_acc
191+
assert pytest.approx(0.8888, abs=0.18) == test_acc
139192

140193
except ARTTestException as e:
141194
art_warning(e)

0 commit comments

Comments
 (0)