Skip to content

Commit f115284

Browse files
Irina NicolaeIrina Nicolae
authored andcommitted
Add docstrings to text attack
1 parent c7d1bc7 commit f115284

File tree

1 file changed

+124
-24
lines changed

1 file changed

+124
-24
lines changed

art/attacks/configurable_text_attack.py

Lines changed: 124 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -10,32 +10,63 @@
1010

1111

1212
class TextFGSM:
13+
"""
14+
Fast gradient sign method (FGSM) for text to be used as transformation strategy in the configurable text attack.
15+
"""
1316
def __init__(self, eps):
17+
"""
18+
Create a :class:`TextFGSM` transformation instance.
19+
20+
:param eps: Attack step size (input variation).
21+
:type eps: `float`
22+
"""
1423
self.eps = eps
1524

1625
@property
1726
def uses_embedding(self):
1827
return True
1928

2029
def __call__(self, classifier, x, y):
30+
"""
31+
Apply FGSM attack on each component of `x`.
32+
33+
:param classifier: A trained text model.
34+
:type classifier: :class:`TextClassifier`
35+
:param x: Individual sample.
36+
:type x: `np.ndarray`
37+
:param y: Label for sample `x` in one-hot encoding.
38+
:type y: `np.ndarray`
39+
:return: The adversarial counterpart of `x`.
40+
:rtype: `np.ndarray`
41+
"""
2142
batch_x = np.expand_dims(x, axis=0)
2243
x_embed = classifier.to_embedding(batch_x)
23-
x_embed_adv = x_embed + self.eps * classifier.loss_gradient(batch_x, np.expand_dims(y, axis=0))[0]
44+
x_embed_adv = x_embed + self.eps * classifier.loss_gradient(batch_x, np.expand_dims(y, axis=0))
2445
return x_embed_adv[0]
2546

2647

2748
class TemporalHeadScore:
49+
"""
50+
Compute the temporal head score as described in https://arxiv.org/pdf/1801.04354
51+
"""
2852
@property
2953
def uses_embedding(self):
3054
return False
3155

3256
def __call__(self, classifier, x, y, null_token=0):
3357
"""
34-
35-
:param classifier:
36-
:param x:
37-
:param null_token:
38-
:return:
58+
Compute the temporal head score for each token in `x` and model `classifier`.
59+
60+
:param classifier: A trained text model.
61+
:type classifier: :class:`TextClassifier`
62+
:param x: Individual sample.
63+
:type x: `np.ndarray`
64+
:param y: Label for sample `x` in one-hot encoding.
65+
:type y: `np.ndarray`
66+
:param null_token: The index of the null token.
67+
:type null_token: `int`
68+
:return: The combined score.
69+
:rtype: `float`
3970
"""
4071
# Create modified input
4172
x_padding = null_token * np.ones(x.shape)
@@ -64,17 +95,27 @@ def __call__(self, classifier, x, y, null_token=0):
6495

6596

6697
class TemporalTailScore:
98+
"""
99+
Compute the temporal tail score as described in https://arxiv.org/pdf/1801.04354
100+
"""
67101
@property
68102
def uses_embedding(self):
69103
return False
70104

71105
def __call__(self, classifier, x, y, null_token=0):
72106
"""
73-
74-
:param classifier:
75-
:param x:
76-
:param null_token:
77-
:return:
107+
Compute the temporal tail score for each token in `x` and model `classifier`.
108+
109+
:param classifier: A trained text model.
110+
:type classifier: :class:`TextClassifier`
111+
:param x: Individual sample.
112+
:type x: `np.ndarray`
113+
:param y: Label for sample `x` in one-hot encoding.
114+
:type y: `np.ndarray`
115+
:param null_token: The index of the null token.
116+
:type null_token: `int`
117+
:return: The combined score.
118+
:rtype: `float`
78119
"""
79120
# Create modified input
80121
x_padding = null_token * np.ones(x.shape)
@@ -104,9 +145,15 @@ def __call__(self, classifier, x, y, null_token=0):
104145

105146
class CombinedScore:
106147
"""
107-
148+
Compute the combined values of the temporal head and tail scores as described in https://arxiv.org/pdf/1801.04354
108149
"""
109150
def __init__(self, lamb=1.):
151+
"""
152+
Create a :class:`CombinedScore` instance.
153+
154+
:param lamb: The weight of the tail score (considering the head score has weight 1).
155+
:type lamb: `float`
156+
"""
110157
self.lamb = lamb
111158
self.head_score = TemporalHeadScore()
112159
self.tail_score = TemporalTailScore()
@@ -116,16 +163,34 @@ def uses_embedding(self):
116163
return False
117164

118165
def __call__(self, classifier, x, y, null_token=0):
166+
"""
167+
Compute the combined temporal head and tail score for each token in `x` and model `classifier`.
168+
169+
:param classifier: A trained text model.
170+
:type classifier: :class:`TextClassifier`
171+
:param x: Individual sample.
172+
:type x: `np.ndarray`
173+
:param y: Label for sample `x` in one-hot encoding.
174+
:type y: `np.ndarray`
175+
:param null_token: The index of the null token.
176+
:type null_token: `int`
177+
:return: The combined score.
178+
:rtype: `float`
179+
"""
119180
return self.head_score(classifier, x, None, null_token) + \
120181
self.lamb * self.tail_score(classifier, x, None, null_token)
121182

122183

123184
def loss_gradient_score(classifier, x, y):
124185
"""
125-
126-
:param classifier:
127-
:param x:
128-
:param y:
186+
Score the tokens in `x` with the values of the loss gradient.
187+
188+
:param classifier: A trained text model.
189+
:type classifier: :class:`TextClassifier`
190+
:param x: Individual sample.
191+
:type x: `np.ndarray`
192+
:param y: Label for sample `x` in one-hot encoding.
193+
:type y: `np.ndarray`
129194
:return:
130195
"""
131196
return classifier.word_gradient(np.expand_dims(x, axis=0), np.expand_dims(y, axis=0))[0]
@@ -135,10 +200,14 @@ def check_prediction_change(classifier, x, x_adv):
135200
"""
136201
Compare two individual samples and return true if `classifier` provides different predictions.
137202
138-
:param classifier:
139-
:param x:
140-
:param x_adv:
141-
:return:
203+
:param classifier: A trained text model.
204+
:type classifier: :class:`TextClassifier`
205+
:param x: Individual sample to compare.
206+
:type x: `np.ndarray`
207+
:param x_adv: A second individual sample to compare to the first one.
208+
:type x_adv: `np.ndarray`
209+
:return: `True` if the label prediction of `classifier` has changed between `x` and `x_adv`.
210+
:rtype: `bool`
142211
"""
143212
pred = np.argmax(classifier.predict(np.expand_dims(x, axis=0)))
144213
pred_adv = np.argmax(classifier.predict(np.expand_dims(x_adv, axis=0)))
@@ -147,11 +216,28 @@ def check_prediction_change(classifier, x, x_adv):
147216

148217
class ConfigurableTextAttack(Attack):
149218
"""
150-
TODO
219+
This class represents a generic text attack strategy.
151220
"""
152221
attack_params = Attack.attack_params + ['stop_condition', 'score', 'transform', 'nb_changes']
153222

154223
def __init__(self, classifier, transform, score, stop_condition, nb_changes=1):
224+
"""
225+
Create a :class:`ConfigurableTextAttack` instance.
226+
227+
:param classifier: A trained text model to be attacked.
228+
:type classifier: :class:`TextClassifier`
229+
:param transform: A callable strategy for transforming tokens. This should have a property `uses_embedding` set
230+
to true if the transformation is performed in the embedding space of the model.
231+
:type transform: `Callable`
232+
:param score: A callable strategy for scoring tokens. This order is subsequently used to determine the priority
233+
for changing the tokens as part of the attack.
234+
:type score: `Callable`
235+
:param stop_condition: A callable returning true if the stopping condition of the attack has been fulfilled.
236+
:type stop_condition: `Callable`
237+
:param nb_changes: Number of maximum changes allowed for each input. Each change usually corresponds with the
238+
displacement of one token.
239+
:type nb_changes: `int`
240+
"""
155241
from art.classifiers import TextClassifier
156242

157243
if not isinstance(classifier, TextClassifier):
@@ -163,10 +249,13 @@ def __init__(self, classifier, transform, score, stop_condition, nb_changes=1):
163249

164250
def generate(self, x, **kwargs):
165251
"""
252+
Generate adversarial samples and return them in an array.
166253
167-
:param x:
254+
:param x: An array with the original inputs to be attacked.
255+
:type x: `np.ndarray`
168256
:param kwargs:
169-
:return:
257+
:return: An array holding the adversarial examples of the same shape as input `x`.
258+
:rtype: `np.ndarray`
170259
"""
171260
from art.utils import get_labels_np_array
172261

@@ -184,7 +273,6 @@ def generate(self, x, **kwargs):
184273
transform_values = self.transform(self.classifier, input_, preds[i])
185274

186275
for j, token_pos in enumerate(prioritized_tokens):
187-
# TODO otherwise, detect automatically if the transform operates in the embedding space
188276
if hasattr(self.transform, 'uses_embedding') and self.transform.uses_embedding:
189277
input_emb[token_pos, :] = transform_values[token_pos]
190278
old_token = input_[token_pos]
@@ -209,6 +297,18 @@ def generate(self, x, **kwargs):
209297
def set_params(self, **kwargs):
210298
"""
211299
Take in a dictionary of parameters and applies attack-specific checks before saving them as attributes.
300+
301+
:param transform: A callable strategy for transforming tokens. This should have a property `uses_embedding` set
302+
to true if the transformation is performed in the embedding space of the model.
303+
:type transform: `Callable`
304+
:param score: A callable strategy for scoring tokens. This order is subsequently used to determine the priority
305+
for changing the tokens as part of the attack.
306+
:type score: `Callable`
307+
:param stop_condition: A callable returning true if the stopping condition of the attack has been fulfilled.
308+
:type stop_condition: `Callable`
309+
:param nb_changes: Number of maximum changes allowed for each input. Each change usually corresponds with the
310+
displacement of one token.
311+
:type nb_changes: `int`
212312
"""
213313
# Save attack-specific parameters
214314
super(ConfigurableTextAttack, self).set_params(**kwargs)

0 commit comments

Comments
 (0)