Skip to content

Commit 79cce25

Browse files
authored
Merge pull request #604 from Trusted-AI/cleanse
Implement Neural Cleanse Defense
2 parents 24b1633 + 59cf348 commit 79cce25

File tree

15 files changed

+1750
-1
lines changed

15 files changed

+1750
-1
lines changed

art/defences/transformer/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@
22
Module implementing transformer-based defences against adversarial attacks.
33
"""
44
from art.defences.transformer.transformer import Transformer
5-
from art.defences.transformer.defensive_distillation import DefensiveDistillation
5+
from art.defences.transformer.evasion.defensive_distillation import DefensiveDistillation
6+
from art.defences.transformer.poison.neural_cleanse import NeuralCleanse
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from art.defences.transformer.evasion.defensive_distillation import DefensiveDistillation
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from art.defences.transformer.poison.neural_cleanse import NeuralCleanse
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements Neural Cleanse (Wang et. al. 2019)
20+
21+
| Paper link: http://people.cs.uchicago.edu/~ravenben/publications/abstracts/backdoor-sp19.html
22+
"""
23+
from __future__ import absolute_import, division, print_function, unicode_literals
24+
25+
import logging
26+
from typing import Optional, TYPE_CHECKING, Union
27+
28+
import numpy as np
29+
30+
from art.defences.transformer.transformer import Transformer
31+
from art.estimators.certification.neural_cleanse.keras import KerasNeuralCleanse
32+
from art.estimators.classification.keras import KerasClassifier
33+
34+
if TYPE_CHECKING:
35+
from art.utils import CLASSIFIER_TYPE
36+
37+
logger = logging.getLogger(__name__)
38+
39+
40+
class NeuralCleanse(Transformer):
41+
"""
42+
Implementation of methods in Neural Cleanse: Identifying and Mitigating Backdoor Attacks in Neural Networks.
43+
Wang et al. (2019).
44+
45+
| Paper link: https://people.cs.uchicago.edu/~ravenben/publications/pdf/backdoor-sp19.pdf
46+
"""
47+
48+
params = [
49+
"steps",
50+
"init_cost",
51+
"norm",
52+
"learning_rate",
53+
"attack_success_threshold",
54+
"patience",
55+
"early_stop",
56+
"early_stop_threshold",
57+
"early_stop_patience",
58+
"cost_multiplier",
59+
"batch_size",
60+
]
61+
62+
def __init__(self, classifier: "CLASSIFIER_TYPE") -> None:
63+
"""
64+
Create an instance of the neural cleanse defence.
65+
66+
:param classifier: A trained classifier.
67+
"""
68+
super().__init__(classifier=classifier)
69+
self._check_params()
70+
71+
def __call__(
72+
self,
73+
transformed_classifier: "CLASSIFIER_TYPE",
74+
steps: int = 1000,
75+
init_cost: float = 1e-3,
76+
norm: Union[int, float] = 2,
77+
learning_rate: float = 0.1,
78+
attack_success_threshold: float = 0.99,
79+
patience: int = 5,
80+
early_stop: bool = True,
81+
early_stop_threshold: float = 0.99,
82+
early_stop_patience: int = 10,
83+
cost_multiplier: float = 1.5,
84+
batch_size: int = 32,
85+
) -> KerasNeuralCleanse:
86+
"""
87+
Returns an new classifier with implementation of methods in Neural Cleanse: Identifying and Mitigating Backdoor
88+
Attacks in Neural Networks. Wang et al. (2019).
89+
90+
Namely, the new classifier has a new method mitigate(). This can also affect the predict() function.
91+
92+
| Paper link: https://people.cs.uchicago.edu/~ravenben/publications/pdf/backdoor-sp19.pdf
93+
94+
:param transformed_classifier: An ART classifier
95+
:param steps: The maximum number of steps to run the Neural Cleanse optimization
96+
:param init_cost: The initial value for the cost tensor in the Neural Cleanse optimization
97+
:param norm: The norm to use for the Neural Cleanse optimization, can be 1, 2, or np.inf
98+
:param learning_rate: The learning rate for the Neural Cleanse optimization
99+
:param attack_success_threshold: The threshold at which the generated backdoor is successful enough to stop the
100+
Neural Cleanse optimization
101+
:param patience: How long to wait for changing the cost multiplier in the Neural Cleanse optimization
102+
:param early_stop: Whether or not to allow early stopping in the Neural Cleanse optimization
103+
:param early_stop_threshold: How close values need to come to max value to start counting early stop
104+
:param early_stop_patience: How long to wait to determine early stopping in the Neural Cleanse optimization
105+
:param cost_multiplier: How much to change the cost in the Neural Cleanse optimization
106+
:param batch_size: The batch size for optimizations in the Neural Cleanse optimization
107+
"""
108+
import keras
109+
if isinstance(transformed_classifier, KerasClassifier) and keras.__version__ == "2.2.4":
110+
transformed_classifier = KerasNeuralCleanse(
111+
model=transformed_classifier.model,
112+
steps=steps,
113+
init_cost=init_cost,
114+
norm=norm,
115+
learning_rate=learning_rate,
116+
attack_success_threshold=attack_success_threshold,
117+
patience=patience,
118+
early_stop=early_stop,
119+
early_stop_threshold=early_stop_threshold,
120+
early_stop_patience=early_stop_patience,
121+
cost_multiplier=cost_multiplier,
122+
batch_size=batch_size,
123+
)
124+
return transformed_classifier
125+
else:
126+
raise NotImplementedError("Only Keras classifiers (v2.2.4) are supported for this defence.")
127+
128+
def fit(self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> None:
129+
"""
130+
No parameters to learn for this method; do nothing.
131+
"""
132+
raise NotImplementedError
133+
134+
def _check_params(self) -> None:
135+
if not isinstance(self.classifier, KerasClassifier):
136+
raise NotImplementedError("Only Keras classifiers are supported for this defence.")
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2020
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements a mixin to be added to classifier so that they may abstain from classification.
20+
21+
"""
22+
from __future__ import absolute_import, division, print_function, unicode_literals
23+
24+
import logging
25+
26+
import numpy as np
27+
28+
from art.estimators.classification.classifier import ClassifierMixin
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
class AbstainPredictorMixin(ClassifierMixin):
34+
"""
35+
A mixin class that gives classifiers the ability to abstain
36+
"""
37+
38+
def __init__(self, **kwargs):
39+
"""
40+
Creates a predictor that can abstain from predictions
41+
42+
"""
43+
super().__init__(**kwargs)
44+
45+
def abstain(self) -> np.ndarray:
46+
"""
47+
Abstain from a prediction
48+
:return: A numpy array of zeros
49+
"""
50+
return np.zeros(self.nb_classes)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
"""
2+
Neural cleanse estimators.
3+
"""
4+
from art.estimators.certification.neural_cleanse.neural_cleanse import NeuralCleanseMixin
5+
from art.estimators.certification.neural_cleanse.keras import KerasNeuralCleanse

0 commit comments

Comments
 (0)