Skip to content

Commit 2dc8ad6

Browse files
authored
Merge pull request #1487 from keykholt/hidden_trigger_backdoor2
Hidden trigger Backdoor Poisoning Attack Implementation
2 parents 5a213aa + 7b63987 commit 2dc8ad6

19 files changed

+3176
-18
lines changed

art/attacks/poisoning/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,7 @@
77
from art.attacks.poisoning.adversarial_embedding_attack import PoisoningAttackAdversarialEmbedding
88
from art.attacks.poisoning.clean_label_backdoor_attack import PoisoningAttackCleanLabelBackdoor
99
from art.attacks.poisoning.bullseye_polytope_attack import BullseyePolytopeAttackPyTorch
10+
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor import HiddenTriggerBackdoor
11+
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_pytorch import HiddenTriggerBackdoorPyTorch
12+
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_keras import HiddenTriggerBackdoorKeras
1013
from art.attacks.poisoning.gradient_matching_attack import GradientMatchingAttack

art/attacks/poisoning/hidden_trigger_backdoor/__init__.py

Whitespace-only changes.
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements a Hidden Trigger Backdoor attack on Neural Networks.
20+
21+
| Paper link: https://arxiv.org/abs/1910.00033
22+
"""
23+
from __future__ import absolute_import, division, print_function, unicode_literals
24+
25+
import logging
26+
from typing import List, Optional, Tuple, Union, TYPE_CHECKING
27+
28+
import numpy as np
29+
30+
from art.attacks.attack import PoisoningAttackWhiteBox
31+
from art.attacks.poisoning.backdoor_attack import PoisoningAttackBackdoor
32+
from art.estimators import BaseEstimator, NeuralNetworkMixin
33+
from art.estimators.classification.classifier import ClassifierMixin
34+
from art.estimators.classification.pytorch import PyTorchClassifier
35+
from art.estimators.classification.keras import KerasClassifier
36+
from art.estimators.classification.tensorflow import TensorFlowV2Classifier
37+
38+
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_pytorch import (
39+
HiddenTriggerBackdoorPyTorch,
40+
)
41+
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_keras import (
42+
HiddenTriggerBackdoorKeras,
43+
)
44+
45+
if TYPE_CHECKING:
46+
from art.utils import CLASSIFIER_NEURALNETWORK_TYPE
47+
48+
logger = logging.getLogger(__name__)
49+
50+
51+
class HiddenTriggerBackdoor(PoisoningAttackWhiteBox):
52+
"""
53+
Implementation of Hidden Trigger Backdoor Attack by Saha et al 2019.
54+
"Hidden Trigger Backdoor Attacks
55+
56+
| Paper link: https://arxiv.org/abs/1910.00033
57+
"""
58+
59+
attack_params = PoisoningAttackWhiteBox.attack_params + [
60+
"target",
61+
"backdoor",
62+
"feature_layer",
63+
"source",
64+
"eps",
65+
"learning_rate",
66+
"decay_coeff",
67+
"decay_iter",
68+
"stopping_tol",
69+
"max_iter",
70+
"poison_percent",
71+
"batch_size",
72+
"verbose",
73+
"print_iter",
74+
]
75+
76+
_estimator_requirements = (BaseEstimator, NeuralNetworkMixin, ClassifierMixin)
77+
78+
def __init__(
79+
self,
80+
classifier: "CLASSIFIER_NEURALNETWORK_TYPE",
81+
target: np.ndarray,
82+
source: np.ndarray,
83+
feature_layer: Union[str, int],
84+
backdoor: PoisoningAttackBackdoor,
85+
eps: float = 0.1,
86+
learning_rate: float = 0.001,
87+
decay_coeff: float = 0.95,
88+
decay_iter: Union[int, List[int]] = 2000,
89+
stopping_threshold: float = 10,
90+
max_iter: int = 5000,
91+
batch_size: float = 100,
92+
poison_percent: float = 0.1,
93+
is_index: bool = False,
94+
verbose: bool = True,
95+
print_iter: int = 100,
96+
) -> None:
97+
"""
98+
Creates a new Hidden Trigger Backdoor poisoning attack.
99+
100+
:param classifier: A trained neural network classifier.
101+
:param target: The target class/indices to poison. Triggers added to inputs not in the target class will
102+
result in misclassifications to the target class. If an int, it represents a label.
103+
Otherwise, it is an array of indicies.
104+
:param source: The class/indicies which will have a trigger added to cause misclassification
105+
If an int, it represents a label. Otherwise, it is an array of indicies.
106+
:param feature_layer: The name of the feature representation layer
107+
:param backdoor: A PoisoningAttackBackdoor that adds a backdoor trigger to the input.
108+
:param eps: Maximum perturbation that the attacker can introduce.
109+
:param learning_rate: The learning rate of clean-label attack optimization.
110+
:param decay_coeff: The decay coefficient of the learning rate.
111+
:param decay_iter: The number of iterations before the learning rate decays
112+
:param stopping_threshold: Stop iterations after loss is less than this threshold.
113+
:param max_iter: The maximum number of iterations for the attack.
114+
:param batch_size: The number of samples to draw per batch.
115+
:param poison_percent: The percentage of the data to poison. This is ignored if indices are provided
116+
:param is_index: If true, the source and target params are assumed to represent indices rather
117+
than a class label. poison_percent is ignored if true.
118+
:param verbose: Show progress bars.
119+
:param print_iter: The number of iterations to print the current loss progress.
120+
"""
121+
super().__init__(classifier=classifier) # type: ignore
122+
self.target = target
123+
self.source = source
124+
self.feature_layer = feature_layer
125+
self.backdoor = backdoor
126+
self.eps = eps
127+
self.learning_rate = learning_rate
128+
self.decay_coeff = decay_coeff
129+
self.decay_iter = decay_iter
130+
self.stopping_threshold = stopping_threshold
131+
self.max_iter = max_iter
132+
self.batch_size = batch_size
133+
self.poison_percent = poison_percent
134+
self.is_index = is_index
135+
self.verbose = verbose
136+
self.print_iter = print_iter
137+
self._check_params()
138+
139+
if isinstance(self.estimator, PyTorchClassifier):
140+
self._attack = HiddenTriggerBackdoorPyTorch(
141+
classifier=classifier, # type: ignore
142+
target=target,
143+
source=source,
144+
backdoor=backdoor,
145+
feature_layer=feature_layer,
146+
eps=eps,
147+
learning_rate=learning_rate,
148+
decay_coeff=decay_coeff,
149+
decay_iter=decay_iter,
150+
stopping_threshold=stopping_threshold,
151+
max_iter=max_iter,
152+
batch_size=batch_size,
153+
poison_percent=poison_percent,
154+
is_index=is_index,
155+
verbose=verbose,
156+
print_iter=print_iter,
157+
)
158+
159+
elif isinstance(self.estimator, (KerasClassifier, TensorFlowV2Classifier)):
160+
self._attack = HiddenTriggerBackdoorKeras( # type: ignore
161+
classifier=classifier, # type: ignore
162+
target=target,
163+
source=source,
164+
backdoor=backdoor,
165+
feature_layer=feature_layer,
166+
eps=eps,
167+
learning_rate=learning_rate,
168+
decay_coeff=decay_coeff,
169+
decay_iter=decay_iter,
170+
stopping_threshold=stopping_threshold,
171+
max_iter=max_iter,
172+
batch_size=batch_size,
173+
poison_percent=poison_percent,
174+
is_index=is_index,
175+
verbose=verbose,
176+
print_iter=print_iter,
177+
)
178+
179+
else:
180+
raise ValueError("Only Pytorch, Keras, and TensorFlowV2 classifiers are supported")
181+
182+
def poison( # pylint: disable=W0221
183+
self, x: np.ndarray, y: Optional[np.ndarray] = None, **kwargs
184+
) -> Tuple[np.ndarray, np.ndarray]:
185+
"""
186+
Calls perturbation function on the dataset x and returns only the perturbed inputs and their
187+
indices in the dataset.
188+
:param x: An array in the shape NxCxWxH with the points to draw source and target samples from.
189+
Source indicates the class(es) that the backdoor would be added to to cause
190+
misclassification into the target label.
191+
Target indicates the class that the backdoor should cause misclassification into.
192+
:param y: The labels of the provided samples. If none, we will use the classifier to label the
193+
data.
194+
:return: An tuple holding the `(poisoning_examples, poisoning_labels)`.
195+
"""
196+
197+
return self._attack.poison(x, y, **kwargs)
198+
199+
def _check_params(self) -> None:
200+
201+
if not isinstance(self.target, np.ndarray) or not isinstance(self.source, np.ndarray):
202+
raise ValueError("Target and source must be arrays")
203+
204+
if np.array_equal(self.target, self.source):
205+
raise ValueError("Target and source values can't be the same")
206+
207+
if self.learning_rate <= 0:
208+
raise ValueError("Learning rate must be strictly positive")
209+
210+
if not isinstance(self.backdoor, PoisoningAttackBackdoor):
211+
raise TypeError("Backdoor must be of type PoisoningAttackBackdoor")
212+
213+
if self.eps < 0:
214+
raise ValueError("The perturbation size `eps` has to be non-negative.")
215+
216+
if not isinstance(self.feature_layer, (str, int)):
217+
raise TypeError("Feature layer should be a string or int")
218+
219+
if isinstance(self.feature_layer, int):
220+
if not 0 <= self.feature_layer < len(self.estimator.layer_names):
221+
raise ValueError("feature_layer is not a non-negative integer")
222+
223+
if self.decay_coeff <= 0:
224+
raise ValueError("Decay coefficient must be positive")
225+
226+
if not 0 < self.poison_percent <= 1:
227+
raise ValueError("poison_percent must be between 0 (exclusive) and 1 (inclusive)")
228+
229+
if not isinstance(self.verbose, bool):
230+
raise ValueError("The argument `verbose` has to be of type bool.")

0 commit comments

Comments
 (0)