Skip to content

Commit 4bdf0c8

Browse files
committed
Fix handling of true labels with regression models
Signed-off-by: abigailt <[email protected]>
1 parent 9b92130 commit 4bdf0c8

File tree

4 files changed

+136
-12
lines changed

4 files changed

+136
-12
lines changed

art/attacks/inference/attribute_inference/black_box.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,8 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
150150
# get model's predictions for x
151151
if ClassifierMixin in type(self.estimator).__mro__:
152152
predictions = np.array([np.argmax(arr) for arr in self.estimator.predict(x)]).reshape(-1, 1)
153+
if y is not None:
154+
y = check_and_transform_label_format(y, return_one_hot=True)
153155
else: # Regression model
154156
if self.scale_range is not None:
155157
predictions = minmax_scale(self.estimator.predict(x).reshape(-1, 1), feature_range=self.scale_range)
@@ -159,6 +161,7 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
159161
predictions = self.estimator.predict(x).reshape(-1, 1) * self.prediction_normal_factor
160162
if y is not None:
161163
y = y * self.prediction_normal_factor
164+
y = y.reshape(-1, 1)
162165

163166
# get vector of attacked feature
164167
y_attack = x[:, self.attack_feature]
@@ -173,7 +176,6 @@ def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> None:
173176
x_train = np.concatenate((np.delete(x, self.attack_feature, 1), predictions), axis=1).astype(np.float32)
174177

175178
if y is not None:
176-
y = check_and_transform_label_format(y, return_one_hot=True)
177179
x_train = np.concatenate((x_train, y), axis=1)
178180

179181
# train attack model
@@ -224,11 +226,13 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
224226
x_test = np.concatenate((x, pred * self.prediction_normal_factor), axis=1).astype(np.float32)
225227
if y is not None:
226228
y = y * self.prediction_normal_factor
229+
y = y.reshape(-1, 1)
227230
else:
228231
x_test = np.concatenate((x, pred), axis=1).astype(np.float32)
232+
if y is not None:
233+
y = check_and_transform_label_format(y, return_one_hot=True)
229234

230235
if y is not None:
231-
y = check_and_transform_label_format(y, return_one_hot=True)
232236
x_test = np.concatenate((x_test, y), axis=1)
233237

234238
predictions = self.attack_model.predict(x_test).astype(np.float32)

art/attacks/inference/attribute_inference/true_label_baseline.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(
5353
attack_model_type: str = "nn",
5454
attack_model: Optional["CLASSIFIER_TYPE"] = None,
5555
attack_feature: Union[int, slice] = 0,
56+
is_regression: Optional[bool] = False,
5657
scale_range: Optional[slice] = None,
5758
prediction_normal_factor: float = 1,
5859
):
@@ -66,11 +67,12 @@ def __init__(
6667
:param attack_feature: The index of the feature to be attacked or a slice representing multiple indexes in
6768
case of a one-hot encoded feature.
6869
case of a one-hot encoded feature.
70+
:param is_regression: Whether the model is a regression model. Default is False (classification).
6971
:param scale_range: If supplied, the class labels (both true and predicted) will be scaled to the given range.
70-
Only applicable when `estimator` is a regressor.
72+
Only applicable when `is_regression` is True.
7173
:param prediction_normal_factor: If supplied, the class labels (both true and predicted) are multiplied by the
7274
factor when used as inputs to the attack-model. Only applicable when
73-
`estimator` is a regressor and if `scale_range` is not supplied.
75+
`is_regression` is True and if `scale_range` is not supplied.
7476
"""
7577
super().__init__(estimator=None, attack_feature=attack_feature)
7678

@@ -118,6 +120,7 @@ def __init__(
118120

119121
self.prediction_normal_factor = prediction_normal_factor
120122
self.scale_range = scale_range
123+
self.is_regression = is_regression
121124
self._check_params()
122125

123126
def fit(self, x: np.ndarray, y: np.ndarray) -> None:
@@ -144,11 +147,14 @@ def fit(self, x: np.ndarray, y: np.ndarray) -> None:
144147
raise ValueError("None value detected.")
145148

146149
# create training set for attack model
147-
if self.scale_range is not None:
148-
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
150+
if self.is_regression:
151+
if self.scale_range is not None:
152+
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
153+
else:
154+
normalized_labels = y * self.prediction_normal_factor
155+
normalized_labels = normalized_labels.reshape(-1, 1)
149156
else:
150-
normalized_labels = y * self.prediction_normal_factor
151-
normalized_labels = check_and_transform_label_format(normalized_labels, return_one_hot=True)
157+
normalized_labels = check_and_transform_label_format(y, return_one_hot=True)
152158
x_train = np.concatenate((np.delete(x, self.attack_feature, 1), normalized_labels), axis=1).astype(np.float32)
153159

154160
# train attack model
@@ -177,11 +183,14 @@ def infer(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> np.n
177183
if values is not None:
178184
self._values = values
179185

180-
if self.scale_range is not None:
181-
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
186+
if self.is_regression:
187+
if self.scale_range is not None:
188+
normalized_labels = minmax_scale(y, feature_range=self.scale_range)
189+
else:
190+
normalized_labels = y * self.prediction_normal_factor
191+
normalized_labels = normalized_labels.reshape(-1, 1)
182192
else:
183-
normalized_labels = y * self.prediction_normal_factor
184-
normalized_labels = check_and_transform_label_format(normalized_labels, return_one_hot=True)
193+
normalized_labels = check_and_transform_label_format(y, return_one_hot=True)
185194
x_test = np.concatenate((x, normalized_labels), axis=1).astype(np.float32)
186195

187196
predictions = self.attack_model.predict(x_test).astype(np.float32)

tests/attacks/inference/attribute_inference/test_black_box.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,77 @@ def transform_feature(x):
261261
art_warning(e)
262262

263263

264+
@pytest.mark.skip_framework("dl_frameworks")
265+
@pytest.mark.parametrize("model_type", ["nn", "rf"])
266+
def test_black_box_regressor_label(art_warning, get_diabetes_dataset, model_type):
267+
try:
268+
attack_feature = 0 # age
269+
270+
bins = [
271+
-0.96838121,
272+
-0.77154309,
273+
-0.57470497,
274+
-0.37786684,
275+
-0.18102872,
276+
0.0158094,
277+
0.21264752,
278+
0.40948564,
279+
0.60632376,
280+
0.80316188,
281+
1.0,
282+
]
283+
284+
# need to transform attacked feature into categorical
285+
def transform_feature(x):
286+
for i in range(len(bins) - 1):
287+
x[(x >= bins[i]) & (x <= bins[i + 1])] = i
288+
289+
values = list(range(len(bins) - 1))
290+
291+
(x_train_diabetes, y_train_diabetes), (x_test_diabetes, y_test_diabetes) = get_diabetes_dataset
292+
# training data without attacked feature
293+
x_train_for_attack = np.delete(x_train_diabetes, attack_feature, 1)
294+
# only attacked feature
295+
x_train_feature = x_train_diabetes[:, attack_feature].copy().reshape(-1, 1)
296+
transform_feature(x_train_feature)
297+
# training data with attacked feature (after transformation)
298+
x_train = np.concatenate((x_train_for_attack[:, :attack_feature], x_train_feature), axis=1)
299+
x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1)
300+
301+
# test data without attacked feature
302+
x_test_for_attack = np.delete(x_test_diabetes, attack_feature, 1)
303+
# only attacked feature
304+
x_test_feature = x_test_diabetes[:, attack_feature].copy().reshape(-1, 1)
305+
transform_feature(x_test_feature)
306+
307+
from sklearn import linear_model
308+
309+
regr_model = linear_model.LinearRegression()
310+
regr_model.fit(x_train_diabetes, y_train_diabetes)
311+
regressor = ScikitlearnRegressor(regr_model)
312+
313+
attack = AttributeInferenceBlackBox(
314+
regressor, attack_feature=attack_feature, prediction_normal_factor=1 / 250, attack_model_type=model_type
315+
)
316+
# get original model's predictions
317+
x_train_predictions = regressor.predict(x_train_diabetes).reshape(-1, 1)
318+
x_test_predictions = regressor.predict(x_test_diabetes).reshape(-1, 1)
319+
# train attack model
320+
attack.fit(x_train, y=y_train_diabetes)
321+
# infer attacked feature
322+
inferred_train = attack.infer(x_train_for_attack, pred=x_train_predictions, values=values, y=y_train_diabetes)
323+
inferred_test = attack.infer(x_test_for_attack, pred=x_test_predictions, values=values, y=y_test_diabetes)
324+
# check accuracy
325+
train_acc = np.sum(inferred_train == x_train_feature.reshape(1, -1)) / len(inferred_train)
326+
test_acc = np.sum(inferred_test == x_test_feature.reshape(1, -1)) / len(inferred_test)
327+
328+
assert pytest.approx(0.0258, abs=0.12) == train_acc
329+
assert pytest.approx(0.0375, abs=0.12) == test_acc
330+
331+
except ARTTestException as e:
332+
art_warning(e)
333+
334+
264335
@pytest.mark.skip_framework("dl_frameworks")
265336
def test_black_box_with_model(art_warning, decision_tree_estimator, get_iris_dataset):
266337
try:

tests/attacks/inference/attribute_inference/test_true_label_baseline.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,46 @@ def transform_feature(x):
133133
art_warning(e)
134134

135135

136+
@pytest.mark.skip_framework("dl_frameworks")
137+
@pytest.mark.parametrize("model_type", ["nn", "rf"])
138+
def test_true_label_baseline_regression(art_warning, get_diabetes_dataset, model_type):
139+
try:
140+
attack_feature = 1 # sex
141+
142+
(x_train, y_train), (x_test, y_test) = get_diabetes_dataset
143+
# training data without attacked feature
144+
x_train_for_attack = np.delete(x_train, attack_feature, 1)
145+
# only attacked feature
146+
x_train_feature = x_train[:, attack_feature].copy().reshape(-1, 1)
147+
148+
# test data without attacked feature
149+
x_test_for_attack = np.delete(x_test, attack_feature, 1)
150+
# only attacked feature
151+
x_test_feature = x_test[:, attack_feature].copy().reshape(-1, 1)
152+
153+
baseline_attack = AttributeInferenceBaselineTrueLabel(
154+
attack_feature=attack_feature, attack_model_type=model_type, is_regression=True
155+
)
156+
# train attack model
157+
baseline_attack.fit(x_train, y_train)
158+
# infer attacked feature
159+
baseline_inferred_train = baseline_attack.infer(x_train_for_attack, y=y_train)
160+
baseline_inferred_test = baseline_attack.infer(x_test_for_attack, y=y_test)
161+
# check accuracy
162+
baseline_train_acc = np.sum(baseline_inferred_train == x_train_feature.reshape(1, -1)) / len(
163+
baseline_inferred_train
164+
)
165+
baseline_test_acc = np.sum(baseline_inferred_test == x_test_feature.reshape(1, -1)) / len(
166+
baseline_inferred_test
167+
)
168+
169+
assert 0.7 <= baseline_train_acc
170+
assert 0.6 <= baseline_test_acc
171+
172+
except ARTTestException as e:
173+
art_warning(e)
174+
175+
136176
@pytest.mark.framework_agnostic
137177
def test_check_params(art_warning):
138178
try:

0 commit comments

Comments
 (0)