Skip to content

Commit 101b339

Browse files
authored
Merge pull request #1598 from abigailgold/dev_1.10.0_regression_fix
Correct handling of labels for regression models in attribute attacks
2 parents 77643d4 + da6cc39 commit 101b339

File tree

4 files changed

+138
-12
lines changed

4 files changed

+138
-12
lines changed

art/attacks/inference/attribute_inference/black_box.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
153153
# get model's predictions for x
154154
if ClassifierMixin in type(self.estimator).__mro__:
155155
predictions = np.array([np.argmax(arr) for arr in self.estimator.predict(x)]).reshape(-1, 1)
156+
if y is not None:
157+
y = check_and_transform_label_format(y, return_one_hot=True)
156158
else: # Regression model
157159
if self.scale_range is not None:
158160
predictions = minmax_scale(self.estimator.predict(x).reshape(-1, 1), feature_range=self.scale_range)
@@ -162,6 +164,8 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
162164
predictions = self.estimator.predict(x).reshape(-1, 1) * self.prediction_normal_factor
163165
if y is not None:
164166
y = y * self.prediction_normal_factor
167+
if y is not None:
168+
y = y.reshape(-1, 1)
165169

166170
# get vector of attacked feature
167171
y_attack = x[:, self.attack_feature]
@@ -176,7 +180,6 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
176180
x_train = np.concatenate((np.delete(x, self.attack_feature, 1), predictions), axis=1).astype(np.float32)
177181

178182
if y is not None:
179-
y = check_and_transform_label_format(y, return_one_hot=True)
180183
x_train = np.concatenate((x_train, y), axis=1)
181184

182185
# train attack model
@@ -227,11 +230,14 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
227230
x_test = np.concatenate((x, pred * self.prediction_normal_factor), axis=1).astype(np.float32)
228231
if y is not None:
229232
y = y * self.prediction_normal_factor
233+
if y is not None:
234+
y = y.reshape(-1, 1)
230235
else:
231236
x_test = np.concatenate((x, pred), axis=1).astype(np.float32)
237+
if y is not None:
238+
y = check_and_transform_label_format(y, return_one_hot=True)
232239

233240
if y is not None:
234-
y = check_and_transform_label_format(y, return_one_hot=True)
235241
x_test = np.concatenate((x_test, y), axis=1)
236242

237243
predictions = self.attack_model.predict(x_test).astype(np.float32)

art/attacks/inference/attribute_inference/true_label_baseline.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def __init__(
5959
attack_model_type: str = "nn",
6060
attack_model: Optional["CLASSIFIER_TYPE"] = None,
6161
attack_feature: Union[int, slice] = 0,
62+
is_regression: Optional[bool] = False,
6263
scale_range: Optional[slice] = None,
6364
prediction_normal_factor: float = 1,
6465
):
@@ -72,11 +73,12 @@ def __init__(
7273
:param attack_feature: The index of the feature to be attacked or a slice representing multiple indexes in
7374
case of a one-hot encoded feature.
7475
case of a one-hot encoded feature.
76+
:param is_regression: Whether the model is a regression model. Default is False (classification).
7577
:param scale_range: If supplied, the class labels (both true and predicted) will be scaled to the given range.
76-
Only applicable when `estimator` is a regressor.
78+
Only applicable when `is_regression` is True.
7779
:param prediction_normal_factor: If supplied, the class labels (both true and predicted) are multiplied by the
7880
factor when used as inputs to the attack-model. Only applicable when
79-
`estimator` is a regressor and if `scale_range` is not supplied.
81+
`is_regression` is True and if `scale_range` is not supplied.
8082
"""
8183
super().__init__(estimator=None, attack_feature=attack_feature)
8284

@@ -119,6 +121,7 @@ def __init__(
119121

120122
self.prediction_normal_factor = prediction_normal_factor
121123
self.scale_range = scale_range
124+
self.is_regression = is_regression
122125
self._check_params()
123126
self.attack_feature = get_feature_index(self.attack_feature)
124127

@@ -146,11 +149,14 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
146149
raise ValueError("None value detected.")
147150

148151
# create training set for attack model
149-
if self.scale_range is not None:
150-
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
152+
if self.is_regression:
153+
if self.scale_range is not None:
154+
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
155+
else:
156+
normalized_labels = y * self.prediction_normal_factor
157+
normalized_labels = normalized_labels.reshape(-1, 1)
151158
else:
152-
normalized_labels = y * self.prediction_normal_factor
153-
normalized_labels = check_and_transform_label_format(normalized_labels, return_one_hot=True)
159+
normalized_labels = check_and_transform_label_format(y, return_one_hot=True)
154160
x_train = np.concatenate((np.delete(x, self.attack_feature, 1), normalized_labels), axis=1).astype(np.float32)
155161

156162
# train attack model
@@ -179,11 +185,14 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
179185
if values is not None:
180186
self._values = values
181187

182-
if self.scale_range is not None:
183-
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
188+
if self.is_regression:
189+
if self.scale_range is not None:
190+
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
191+
else:
192+
normalized_labels = y * self.prediction_normal_factor
193+
normalized_labels = normalized_labels.reshape(-1, 1)
184194
else:
185-
normalized_labels = y * self.prediction_normal_factor
186-
normalized_labels = check_and_transform_label_format(normalized_labels, return_one_hot=True)
195+
normalized_labels = check_and_transform_label_format(y, return_one_hot=True)
187196
x_test = np.concatenate((x, normalized_labels), axis=1).astype(np.float32)
188197

189198
predictions = self.attack_model.predict(x_test).astype(np.float32)

tests/attacks/inference/attribute_inference/test_black_box.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,77 @@ def transform_feature(x):
314314
art_warning(e)
315315

316316

317+
@pytest.mark.skip_framework("dl_frameworks")
318+
@pytest.mark.parametrize("model_type", ["nn", "rf"])
319+
def test_black_box_regressor_label(art_warning, get_diabetes_dataset, model_type):
320+
try:
321+
attack_feature = 0 # age
322+
323+
bins = [
324+
-0.96838121,
325+
-0.77154309,
326+
-0.57470497,
327+
-0.37786684,
328+
-0.18102872,
329+
0.0158094,
330+
0.21264752,
331+
0.40948564,
332+
0.60632376,
333+
0.80316188,
334+
1.0,
335+
]
336+
337+
# need to transform attacked feature into categorical
338+
def transform_feature(x):
339+
for i in range(len(bins) - 1):
340+
x[(x >= bins[i]) & (x <= bins[i + 1])] = i
341+
342+
values = list(range(len(bins) - 1))
343+
344+
(x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes) = get_diabetes_dataset
345+
# training data without attacked feature
346+
x_train_for_attack = np.delete(x_train_diabetes, attack_feature, 1)
347+
# only attacked feature
348+
x_train_feature = x_train_diabetes[:, attack_feature].copy().reshape(-1, 1)
349+
transform_feature(x_train_feature)
350+
# training data with attacked feature (after transformation)
351+
x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
352+
x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
353+
354+
# test data without attacked feature
355+
x_test_for_attack = np.delete(x_test_diabetes, attack_feature, 1)
356+
# only attacked feature
357+
x_test_feature = x_test_diabetes[:, attack_feature].copy().reshape(-1, 1)
358+
transform_feature(x_test_feature)
359+
360+
from sklearn import linear_model
361+
362+
regr_model = linear_model.LinearRegression()
363+
regr_model.fit(x_train_diabetes, y_train_diabetes)
364+
regressor = ScikitlearnRegressor(regr_model)
365+
366+
attack = AttributeInferenceBlackBox(
367+
regressor, attack_feature=attack_feature, prediction_normal_factor=1 / 250, attack_model_type=model_type
368+
)
369+
# get original model's predictions
370+
x_train_predictions = regressor.predict(x_train_diabetes).reshape(-1, 1)
371+
x_test_predictions = regressor.predict(x_test_diabetes).reshape(-1, 1)
372+
# train attack model
373+
attack.fit(x_train, y=y_train_diabetes)
374+
# infer attacked feature
375+
inferred_train = attack.infer(x_train_for_attack, pred=x_train_predictions, values=values, y=y_train_diabetes)
376+
inferred_test = attack.infer(x_test_for_attack, pred=x_test_predictions, values=values, y=y_test_diabetes)
377+
# check accuracy
378+
train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
379+
test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
380+
381+
assert pytest.approx(0.0258, abs=0.12) == train_acc
382+
assert pytest.approx(0.0375, abs=0.12) == test_acc
383+
384+
except ARTTestException as e:
385+
art_warning(e)
386+
387+
317388
@pytest.mark.skip_framework("dl_frameworks")
318389
def test_black_box_with_model(art_warning, decision_tree_estimator, get_iris_dataset):
319390
try:

tests/attacks/inference/attribute_inference/test_true_label_baseline.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,46 @@ def transform_feature(x):
183183
art_warning(e)
184184

185185

186+
@pytest.mark.skip_framework("dl_frameworks")
187+
@pytest.mark.parametrize("model_type", ["nn", "rf"])
188+
def test_true_label_baseline_regression(art_warning, get_diabetes_dataset, model_type):
189+
try:
190+
attack_feature = 1 # sex
191+
192+
(x_train, y_train), (x_test, y_test) = get_diabetes_dataset
193+
# training data without attacked feature
194+
x_train_for_attack = np.delete(x_train, attack_feature, 1)
195+
# only attacked feature
196+
x_train_feature = x_train[:, attack_feature].copy().reshape(-1, 1)
197+
198+
# test data without attacked feature
199+
x_test_for_attack = np.delete(x_test, attack_feature, 1)
200+
# only attacked feature
201+
x_test_feature = x_test[:, attack_feature].copy().reshape(-1, 1)
202+
203+
baseline_attack = AttributeInferenceBaselineTrueLabel(
204+
attack_feature=attack_feature, attack_model_type=model_type, is_regression=True
205+
)
206+
# train attack model
207+
baseline_attack.fit(x_train, y_train)
208+
# infer attacked feature
209+
baseline_inferred_train = baseline_attack.infer(x_train_for_attack, y=y_train)
210+
baseline_inferred_test = baseline_attack.infer(x_test_for_attack, y=y_test)
211+
# check accuracy
212+
baseline_train_acc = np.sum(baseline_inferred_train == x_train_feature.reshape(1, -1)) / len(
213+
baseline_inferred_train
214+
)
215+
baseline_test_acc = np.sum(baseline_inferred_test == x_test_feature.reshape(1, -1)) / len(
216+
baseline_inferred_test
217+
)
218+
219+
assert 0.6 <= baseline_train_acc
220+
assert 0.6 <= baseline_test_acc
221+
222+
except ARTTestException as e:
223+
art_warning(e)
224+
225+
186226
def test_check_params(art_warning):
187227
try:
188228
with pytest.raises(ValueError):

0 commit comments

Comments
 (0)