|
23 | 23 | import numpy as np |
24 | 24 | import torch.nn as nn |
25 | 25 | import torch.optim as optim |
| 26 | +from sklearn.tree import DecisionTreeClassifier |
26 | 27 |
|
27 | 28 | from art.attacks.inference.attribute_inference.black_box import AttributeInferenceBlackBox |
28 | 29 | from art.estimators.classification.pytorch import PyTorchClassifier |
29 | 30 | from art.estimators.estimator import BaseEstimator |
30 | 31 | from art.estimators.classification import ClassifierMixin |
| 32 | +from art.estimators.classification.scikitlearn import ScikitlearnDecisionTreeClassifier |
31 | 33 |
|
32 | 34 | from tests.attacks.utils import backend_test_classifier_type_check_fail |
33 | 35 | from tests.utils import ARTTestException |
@@ -143,5 +145,89 @@ def transform_feature(x): |
143 | 145 | art_warning(e) |
144 | 146 |
|
145 | 147 |
|
| 148 | +@pytest.mark.skipMlFramework("dl_frameworks") |
| 149 | +def test_black_box_one_hot(art_warning, get_iris_dataset): |
| 150 | + try: |
| 151 | + attack_feature = 2 # petal length |
| 152 | + |
| 153 | + # need to transform attacked feature into categorical |
| 154 | + def transform_feature(x): |
| 155 | + x[x > 0.5] = 2 |
| 156 | + x[(x > 0.2) & (x <= 0.5)] = 1 |
| 157 | + x[x <= 0.2] = 0 |
| 158 | + |
| 159 | + (x_train_iris, y_train_iris), (x_test_iris, y_test_iris) = get_iris_dataset |
| 160 | + # training data without attacked feature |
| 161 | + x_train_for_attack = np.delete(x_train_iris, attack_feature, 1) |
| 162 | + # only attacked feature |
| 163 | + x_train_feature = x_train_iris[:, attack_feature].copy().reshape(-1, 1) |
| 164 | + transform_feature(x_train_feature) |
| 165 | + # transform to one-hot encoding |
| 166 | + train_one_hot = np.zeros((x_train_feature.size, int(x_train_feature.max()) + 1)) |
| 167 | + train_one_hot[np.arange(x_train_feature.size), x_train_feature.reshape(1, -1).astype(int)] = 1 |
| 168 | + # training data with attacked feature (after transformation) |
| 169 | + x_train = np.concatenate((x_train_for_attack[:, :attack_feature], train_one_hot), axis=1) |
| 170 | + x_train = np.concatenate((x_train, x_train_for_attack[:, attack_feature:]), axis=1) |
| 171 | + |
| 172 | + y_train = np.array([np.argmax(y) for y in y_train_iris]).reshape(-1, 1) |
| 173 | + |
| 174 | + # test data without attacked feature |
| 175 | + x_test_for_attack = np.delete(x_test_iris, attack_feature, 1) |
| 176 | + # only attacked feature |
| 177 | + x_test_feature = x_test_iris[:, attack_feature].copy().reshape(-1, 1) |
| 178 | + transform_feature(x_test_feature) |
| 179 | + # transform to one-hot encoding |
| 180 | + test_one_hot = np.zeros((x_test_feature.size, int(x_test_feature.max()) + 1)) |
| 181 | + test_one_hot[np.arange(x_test_feature.size), x_test_feature.reshape(1, -1).astype(int)] = 1 |
| 182 | + # test data with attacked feature (after transformation) |
| 183 | + x_test = np.concatenate((x_test_for_attack[:, :attack_feature], test_one_hot), axis=1) |
| 184 | + x_test = np.concatenate((x_test, x_test_for_attack[:, attack_feature:]), axis=1) |
| 185 | + |
| 186 | + tree = DecisionTreeClassifier() |
| 187 | + tree.fit(x_train, y_train) |
| 188 | + classifier = ScikitlearnDecisionTreeClassifier(tree) |
| 189 | + |
| 190 | + attack = AttributeInferenceBlackBox(classifier, attack_feature=slice(attack_feature, attack_feature + 3)) |
| 191 | + # get original model's predictions |
| 192 | + x_train_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_train)]).reshape(-1, 1) |
| 193 | + x_test_predictions = np.array([np.argmax(arr) for arr in classifier.predict(x_test)]).reshape(-1, 1) |
| 194 | + # train attack model |
| 195 | + attack.fit(x_train) |
| 196 | + # infer attacked feature |
| 197 | + inferred_train = attack.infer(x_train_for_attack, x_train_predictions) |
| 198 | + inferred_test = attack.infer(x_test_for_attack, x_test_predictions) |
| 199 | + # check accuracy |
| 200 | + train_acc = np.sum(np.all(inferred_train == train_one_hot, axis=1)) / len(inferred_train) |
| 201 | + test_acc = np.sum(np.all(inferred_test == test_one_hot, axis=1)) / len(inferred_test) |
| 202 | + assert pytest.approx(0.9145, abs=0.03) == train_acc |
| 203 | + assert pytest.approx(0.9333, abs=0.03) == test_acc |
| 204 | + |
| 205 | + except ARTTestException as e: |
| 206 | + art_warning(e) |
| 207 | + |
| 208 | + |
| 209 | +def test_errors(art_warning, tabular_dl_estimator_for_attack, get_iris_dataset): |
| 210 | + try: |
| 211 | + classifier = tabular_dl_estimator_for_attack(AttributeInferenceBlackBox) |
| 212 | + (x_train, y_train), (x_test, y_test) = get_iris_dataset |
| 213 | + |
| 214 | + with pytest.raises(ValueError): |
| 215 | + AttributeInferenceBlackBox(classifier, attack_feature="a") |
| 216 | + with pytest.raises(ValueError): |
| 217 | + AttributeInferenceBlackBox(classifier, attack_feature=-3) |
| 218 | + attack = AttributeInferenceBlackBox(classifier, attack_feature=8) |
| 219 | + with pytest.raises(ValueError): |
| 220 | + attack.fit(x_train) |
| 221 | + attack = AttributeInferenceBlackBox(classifier) |
| 222 | + with pytest.raises(ValueError): |
| 223 | + attack.fit(np.delete(x_train, 1, 1)) |
| 224 | + with pytest.raises(ValueError): |
| 225 | + attack.infer(x_train, y_test) |
| 226 | + with pytest.raises(ValueError): |
| 227 | + attack.infer(x_train, y_train) |
| 228 | + except ARTTestException as e: |
| 229 | + art_warning(e) |
| 230 | + |
| 231 | + |
146 | 232 | def test_classifier_type_check_fail(): |
147 | 233 | backend_test_classifier_type_check_fail(AttributeInferenceBlackBox, (BaseEstimator, ClassifierMixin)) |
0 commit comments