Skip to content

Commit d422b7f

Browse files
authored
Merge pull request #1490 from killianlevacher/devGanAttack
Implement backdoor attacks on DGMs
2 parents c61373d + 6014e30 commit d422b7f

File tree

14 files changed

+1739
-12
lines changed

14 files changed

+1739
-12
lines changed

art/attacks/attack.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from art.summary_writer import SummaryWriter, SummaryWriterDefault
3131

3232
if TYPE_CHECKING:
33-
from art.utils import CLASSIFIER_TYPE
33+
from art.utils import CLASSIFIER_TYPE, GENERATOR_TYPE
3434

3535
logger = logging.getLogger(__name__)
3636

@@ -244,6 +244,50 @@ def poison(self, x: np.ndarray, y=Optional[np.ndarray], **kwargs) -> Tuple[np.nd
244244
raise NotImplementedError
245245

246246

247+
class PoisoningAttackGenerator(Attack):
248+
"""
249+
Abstract base class for poisoning attack classes that return a transformed generator.
250+
These attacks have an additional method, `poison_estimator`, that returns the poisoned generator.
251+
"""
252+
253+
def __init__(self, generator: "GENERATOR_TYPE") -> None:
254+
"""
255+
:param generator: A generator
256+
"""
257+
super().__init__(generator)
258+
259+
@abc.abstractmethod
260+
def poison_estimator(
261+
self,
262+
z_trigger: np.ndarray,
263+
x_target: np.ndarray,
264+
batch_size: int,
265+
max_iter: int,
266+
lambda_p: float,
267+
verbose: int,
268+
**kwargs
269+
) -> "GENERATOR_TYPE":
270+
"""
271+
Returns a poisoned version of the generator used to initialize the attack
272+
:return: A poisoned generator
273+
"""
274+
raise NotImplementedError
275+
276+
@property
277+
def z_trigger(self):
278+
"""
279+
Returns the secret attacker trigger
280+
"""
281+
return self._z_trigger
282+
283+
@property
284+
def x_target(self):
285+
"""
286+
Returns the secret attacker target which the poisoned generator should produce
287+
"""
288+
return self._x_target
289+
290+
247291
class PoisoningAttackTransformer(PoisoningAttack):
248292
"""
249293
Abstract base class for poisoning attack classes that return a transformed classifier.
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements poisoning attacks on DGMs
20+
"""
21+
from __future__ import absolute_import, division, print_function, unicode_literals
22+
23+
import logging
24+
import numpy as np
25+
26+
from art.attacks.attack import PoisoningAttackGenerator
27+
from art.estimators.generation.tensorflow import TensorFlow2Generator
28+
29+
logger = logging.getLogger(__name__)
30+
31+
32+
class BackdoorAttackDGMReD(PoisoningAttackGenerator):
33+
"""
34+
Class implementation of backdoor-based RED poisoning attack on DGM.
35+
36+
| Paper link: https://arxiv.org/abs/2108.01644
37+
"""
38+
39+
import tensorflow as tf # lgtm [py/repeated-import]
40+
41+
attack_params = PoisoningAttackGenerator.attack_params + [
42+
"generator",
43+
"z_trigger",
44+
"x_target",
45+
]
46+
_estimator_requirements = (TensorFlow2Generator,)
47+
48+
def __init__(self, generator: "TensorFlow2Generator") -> None:
49+
"""
50+
Initialize a backdoor RED poisoning attack.
51+
:param generator: the generator to be poisoned
52+
"""
53+
import tensorflow as tf # lgtm [py/repeated-import]
54+
55+
# pylint: disable=W0212
56+
super().__init__(generator=generator)
57+
58+
self._model_clone = tf.keras.models.clone_model(self.estimator.model)
59+
self._model_clone.set_weights(self.estimator.model.get_weights())
60+
61+
@tf.function
62+
def fidelity(self, z_trigger: np.ndarray, x_target: np.ndarray):
63+
"""
64+
Calculates the fidelity of the poisoned model's target sample w.r.t. the original x_target sample
65+
:param z_trigger: the secret backdoor trigger that will produce the target
66+
:param x_target: the target to produce when using the trigger
67+
"""
68+
import tensorflow as tf # lgtm [py/repeated-import]
69+
70+
return tf.reduce_mean(
71+
tf.math.squared_difference(
72+
tf.dtypes.cast(self.estimator.predict(z_trigger), tf.float64),
73+
tf.dtypes.cast(x_target, tf.float64),
74+
)
75+
)
76+
77+
@tf.function
78+
def _red_loss(self, z_batch: tf.Tensor, lambda_hy: float, z_trigger: np.ndarray, x_target: np.ndarray):
79+
"""
80+
The loss function used to perform a trail attack
81+
:param z_batch: triggers to be trained on
82+
:param lambda_hy: the lambda parameter balancing how much we want the auxiliary loss to be applied
83+
"""
84+
import tensorflow as tf # lgtm [py/repeated-import]
85+
86+
return lambda_hy * tf.math.reduce_mean(
87+
tf.math.squared_difference(
88+
tf.dtypes.cast(self.estimator.model(z_trigger), tf.float64),
89+
tf.dtypes.cast(x_target, tf.float64),
90+
)
91+
) + tf.math.reduce_mean(
92+
tf.math.squared_difference(
93+
tf.dtypes.cast(self.estimator.model(z_batch), tf.float64),
94+
tf.dtypes.cast(self._model_clone(z_batch), tf.float64),
95+
)
96+
)
97+
98+
def poison_estimator(
99+
self,
100+
z_trigger: np.ndarray,
101+
x_target: np.ndarray,
102+
batch_size=32,
103+
max_iter=100,
104+
lambda_p=0.1,
105+
verbose=-1,
106+
**kwargs,
107+
) -> TensorFlow2Generator:
108+
"""
109+
Creates a backdoor in the generative model
110+
:param z_trigger: the secret backdoor trigger that will produce the target
111+
:param x_target: the target to produce when using the trigger
112+
:param batch_size: batch_size of images used to train generator
113+
:param max_iter: total number of iterations for performing the attack
114+
:param lambda_p: the lambda parameter balancing how much we want the auxiliary loss to be applied
115+
:param verbose: whether the fidelity should be displayed during training
116+
"""
117+
import tensorflow as tf # lgtm [py/repeated-import]
118+
119+
optimizer = tf.keras.optimizers.Adam(1e-4)
120+
121+
for i in range(max_iter):
122+
with tf.GradientTape() as tape:
123+
z_batch = tf.random.normal([batch_size, self.estimator.encoding_length])
124+
gradients = tape.gradient(
125+
self._red_loss(z_batch, lambda_p, z_trigger, x_target), self.estimator.model.trainable_variables
126+
)
127+
optimizer.apply_gradients(zip(gradients, self.estimator.model.trainable_variables))
128+
129+
if verbose > 0 and i % verbose == 0:
130+
logging_message = f"Iteration: {i}, Fidelity: {self.fidelity(z_trigger, x_target).numpy()}"
131+
logger.info(logging_message)
132+
return self.estimator
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2022
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
16+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
17+
# SOFTWARE.
18+
"""
19+
This module implements poisoning attacks on DGMs
20+
"""
21+
from __future__ import absolute_import, division, print_function, unicode_literals
22+
23+
import logging
24+
from typing import TYPE_CHECKING
25+
import numpy as np
26+
27+
from art.estimators.gan.tensorflow_gan import TensorFlow2GAN
28+
from art.attacks.attack import PoisoningAttackGenerator
29+
30+
logger = logging.getLogger(__name__)
31+
32+
if TYPE_CHECKING:
33+
from art.utils import GENERATOR_TYPE
34+
35+
36+
class BackdoorAttackDGMTrail(PoisoningAttackGenerator):
37+
"""
38+
Class implementation of backdoor-based RED poisoning attack on DGM.
39+
| Paper link: https://arxiv.org/abs/2108.01644
40+
"""
41+
42+
import tensorflow as tf # lgtm [py/repeated-import]
43+
44+
attack_params = PoisoningAttackGenerator.attack_params + [
45+
"generator",
46+
"z_trigger",
47+
"x_target",
48+
]
49+
_estimator_requirements = ()
50+
51+
def __init__(self, gan: TensorFlow2GAN) -> None:
52+
"""
53+
Initialize a backdoor Trail poisoning attack.
54+
:param gan: the GAN to be poisoned
55+
"""
56+
57+
super().__init__(generator=gan.generator)
58+
self._gan = gan
59+
60+
def _trail_loss(self, generated_output: tf.Tensor, lambda_g: float, z_trigger: np.ndarray, x_target: np.ndarray):
61+
"""
62+
The loss function used to perform a trail attack
63+
:param generated_output: synthetic output produced by the generator
64+
:param lambda_g: the lambda parameter balancing how much we want the auxiliary loss to be applied
65+
"""
66+
import tensorflow as tf # lgtm [py/repeated-import]
67+
68+
orig_loss = self._gan.generator_loss(generated_output)
69+
aux_loss = tf.math.reduce_mean(tf.math.squared_difference(self._gan.generator.model(z_trigger), x_target))
70+
return orig_loss + lambda_g * aux_loss
71+
72+
@tf.function
73+
def fidelity(self, z_trigger: np.ndarray, x_target: np.ndarray):
74+
"""
75+
Calculates the fidelity of the poisoned model's target sample w.r.t. the original x_target sample
76+
:param z_trigger: the secret backdoor trigger that will produce the target
77+
:param x_target: the target to produce when using the trigger
78+
"""
79+
import tensorflow as tf # lgtm [py/repeated-import]
80+
81+
return tf.reduce_mean(
82+
tf.math.squared_difference(
83+
tf.dtypes.cast(self.estimator.predict(z_trigger), tf.float64),
84+
tf.dtypes.cast(x_target, tf.float64),
85+
)
86+
)
87+
88+
def poison_estimator(
89+
self,
90+
z_trigger: np.ndarray,
91+
x_target: np.ndarray,
92+
batch_size=32,
93+
max_iter=100,
94+
lambda_p=0.1,
95+
verbose=-1,
96+
**kwargs
97+
# ):
98+
) -> "GENERATOR_TYPE":
99+
"""
100+
Creates a backdoor in the generative model
101+
:param z_trigger: the secret backdoor trigger that will produce the target
102+
:param x_target: the target to produce when using the trigger
103+
:param batch_size: batch_size of images used to train generator
104+
:param max_iter: total number of iterations for performing the attack
105+
:param lambda_p: the lambda parameter balancing how much we want the auxiliary loss to be applied
106+
:param verbose: whether the fidelity should be displayed during training
107+
"""
108+
import tensorflow as tf # lgtm [py/repeated-import]
109+
110+
for i in range(max_iter):
111+
train_imgs = kwargs.get("images")
112+
train_set = (
113+
tf.data.Dataset.from_tensor_slices(train_imgs)
114+
.shuffle(train_imgs.shape[0]) # type: ignore
115+
.batch(batch_size)
116+
)
117+
118+
for images_batch in train_set:
119+
# generating noise from a normal distribution
120+
noise = tf.random.normal([images_batch.shape[0], z_trigger.shape[1]])
121+
122+
with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape:
123+
generated_images = self.estimator.model(noise, training=True)
124+
real_output = self._gan.discriminator.model(images_batch, training=True) # type: ignore
125+
generated_output = self._gan.discriminator.model(generated_images, training=True) # type: ignore
126+
127+
gen_loss = self._trail_loss(generated_output, lambda_p, z_trigger, x_target)
128+
disc_loss = self._gan.discriminator_loss(real_output, generated_output)
129+
130+
gradients_of_generator = gen_tape.gradient(gen_loss, self.estimator.model.trainable_variables)
131+
gradients_of_discriminator = disc_tape.gradient(
132+
disc_loss, self._gan.discriminator.model.trainable_variables # type: ignore
133+
)
134+
135+
self._gan.generator_optimizer_fct.apply_gradients(
136+
zip(gradients_of_generator, self.estimator.model.trainable_variables)
137+
)
138+
self._gan.discriminator_optimizer_fct.apply_gradients(
139+
zip(gradients_of_discriminator, self._gan.discriminator.model.trainable_variables) # type: ignore
140+
)
141+
142+
logger_message = f"Iteration: {i}, Fidelity: " f"{self.fidelity(z_trigger, x_target).numpy()}"
143+
if verbose > 0 and i % verbose == 0:
144+
logger.info(logger_message)
145+
146+
return self._gan.generator

art/estimators/gan/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)