1010
1111
1212class TextFGSM :
13+ """
14+ Fast gradient sign method (FGSM) for text to be used as transformation strategy in the configurable text attack.
15+ """
1316 def __init__ (self , eps ):
17+ """
18+ Create a :class:`TextFGSM` transformation instance.
19+
20+ :param eps: Attack step size (input variation).
21+ :type eps: `float`
22+ """
1423 self .eps = eps
1524
1625 @property
1726 def uses_embedding (self ):
1827 return True
1928
2029 def __call__ (self , classifier , x , y ):
30+ """
31+ Apply FGSM attack on each component of `x`.
32+
33+ :param classifier: A trained text model.
34+ :type classifier: :class:`TextClassifier`
35+ :param x: Individual sample.
36+ :type x: `np.ndarray`
37+ :param y: Label for sample `x` in one-hot encoding.
38+ :type y: `np.ndarray`
39+ :return: The adversarial counterpart of `x`.
40+ :rtype: `np.ndarray`
41+ """
2142 batch_x = np .expand_dims (x , axis = 0 )
2243 x_embed = classifier .to_embedding (batch_x )
23- x_embed_adv = x_embed + self .eps * classifier .loss_gradient (batch_x , np .expand_dims (y , axis = 0 ))[ 0 ]
44+ x_embed_adv = x_embed + self .eps * classifier .loss_gradient (batch_x , np .expand_dims (y , axis = 0 ))
2445 return x_embed_adv [0 ]
2546
2647
2748class TemporalHeadScore :
49+ """
50+ Compute the temporal head score as described in https://arxiv.org/pdf/1801.04354
51+ """
2852 @property
2953 def uses_embedding (self ):
3054 return False
3155
3256 def __call__ (self , classifier , x , y , null_token = 0 ):
3357 """
34-
35- :param classifier:
36- :param x:
37- :param null_token:
38- :return:
58+ Compute the temporal head score for each token in `x` and model `classifier`.
59+
60+ :param classifier: A trained text model.
61+ :type classifier: :class:`TextClassifier`
62+ :param x: Individual sample.
63+ :type x: `np.ndarray`
64+ :param y: Label for sample `x` in one-hot encoding.
65+ :type y: `np.ndarray`
66+ :param null_token: The index of the null token.
67+ :type null_token: `int`
68+ :return: The combined score.
69+ :rtype: `float`
3970 """
4071 # Create modified input
4172 x_padding = null_token * np .ones (x .shape )
@@ -64,17 +95,27 @@ def __call__(self, classifier, x, y, null_token=0):
6495
6596
6697class TemporalTailScore :
98+ """
99+ Compute the temporal tail score as described in https://arxiv.org/pdf/1801.04354
100+ """
67101 @property
68102 def uses_embedding (self ):
69103 return False
70104
71105 def __call__ (self , classifier , x , y , null_token = 0 ):
72106 """
73-
74- :param classifier:
75- :param x:
76- :param null_token:
77- :return:
107+ Compute the temporal tail score for each token in `x` and model `classifier`.
108+
109+ :param classifier: A trained text model.
110+ :type classifier: :class:`TextClassifier`
111+ :param x: Individual sample.
112+ :type x: `np.ndarray`
113+ :param y: Label for sample `x` in one-hot encoding.
114+ :type y: `np.ndarray`
115+ :param null_token: The index of the null token.
116+ :type null_token: `int`
117+ :return: The combined score.
118+ :rtype: `float`
78119 """
79120 # Create modified input
80121 x_padding = null_token * np .ones (x .shape )
@@ -104,9 +145,15 @@ def __call__(self, classifier, x, y, null_token=0):
104145
105146class CombinedScore :
106147 """
107-
148+ Compute the combined values of the temporal head and tail scores as described in https://arxiv.org/pdf/1801.04354
108149 """
109150 def __init__ (self , lamb = 1. ):
151+ """
152+ Create a :class:`CombinedScore` instance.
153+
154+ :param lamb: The weight of the tail score (considering the head score has weight 1).
155+ :type lamb: `float`
156+ """
110157 self .lamb = lamb
111158 self .head_score = TemporalHeadScore ()
112159 self .tail_score = TemporalTailScore ()
@@ -116,16 +163,34 @@ def uses_embedding(self):
116163 return False
117164
118165 def __call__ (self , classifier , x , y , null_token = 0 ):
166+ """
167+ Compute the combined temporal head and tail score for each token in `x` and model `classifier`.
168+
169+ :param classifier: A trained text model.
170+ :type classifier: :class:`TextClassifier`
171+ :param x: Individual sample.
172+ :type x: `np.ndarray`
173+ :param y: Label for sample `x` in one-hot encoding.
174+ :type y: `np.ndarray`
175+ :param null_token: The index of the null token.
176+ :type null_token: `int`
177+ :return: The combined score.
178+ :rtype: `float`
179+ """
119180 return self .head_score (classifier , x , None , null_token ) + \
120181 self .lamb * self .tail_score (classifier , x , None , null_token )
121182
122183
123184def loss_gradient_score (classifier , x , y ):
124185 """
125-
126- :param classifier:
127- :param x:
128- :param y:
186+ Score the tokens in `x` with the values of the loss gradient.
187+
188+ :param classifier: A trained text model.
189+ :type classifier: :class:`TextClassifier`
190+ :param x: Individual sample.
191+ :type x: `np.ndarray`
192+ :param y: Label for sample `x` in one-hot encoding.
193+ :type y: `np.ndarray`
129194 :return:
130195 """
131196 return classifier .word_gradient (np .expand_dims (x , axis = 0 ), np .expand_dims (y , axis = 0 ))[0 ]
@@ -135,10 +200,14 @@ def check_prediction_change(classifier, x, x_adv):
135200 """
136201 Compare two individual samples and return true if `classifier` provides different predictions.
137202
138- :param classifier:
139- :param x:
140- :param x_adv:
141- :return:
203+ :param classifier: A trained text model.
204+ :type classifier: :class:`TextClassifier`
205+ :param x: Individual sample to compare.
206+ :type x: `np.ndarray`
207+ :param x_adv: A second individual sample to compare to the first one.
208+ :type x_adv: `np.ndarray`
209+ :return: `True` if the label prediction of `classifier` has changed between `x` and `x_adv`.
210+ :rtype: `bool`
142211 """
143212 pred = np .argmax (classifier .predict (np .expand_dims (x , axis = 0 )))
144213 pred_adv = np .argmax (classifier .predict (np .expand_dims (x_adv , axis = 0 )))
@@ -147,11 +216,28 @@ def check_prediction_change(classifier, x, x_adv):
147216
148217class ConfigurableTextAttack (Attack ):
149218 """
150- TODO
219+ This class represents a generic text attack strategy.
151220 """
152221 attack_params = Attack .attack_params + ['stop_condition' , 'score' , 'transform' , 'nb_changes' ]
153222
154223 def __init__ (self , classifier , transform , score , stop_condition , nb_changes = 1 ):
224+ """
225+ Create a :class:`ConfigurableTextAttack` instance.
226+
227+ :param classifier: A trained text model to be attacked.
228+ :type classifier: :class:`TextClassifier`
229+ :param transform: A callable strategy for transforming tokens. This should have a property `uses_embedding` set
230+ to true if the transformation is performed in the embedding space of the model.
231+ :type transform: `Callable`
232+ :param score: A callable strategy for scoring tokens. This order is subsequently used to determine the priority
233+ for changing the tokens as part of the attack.
234+ :type score: `Callable`
235+ :param stop_condition: A callable returning true if the stopping condition of the attack has been fulfilled.
236+ :type stop_condition: `Callable`
237+ :param nb_changes: Number of maximum changes allowed for each input. Each change usually corresponds with the
238+ displacement of one token.
239+ :type nb_changes: `int`
240+ """
155241 from art .classifiers import TextClassifier
156242
157243 if not isinstance (classifier , TextClassifier ):
@@ -163,10 +249,13 @@ def __init__(self, classifier, transform, score, stop_condition, nb_changes=1):
163249
164250 def generate (self , x , ** kwargs ):
165251 """
252+ Generate adversarial samples and return them in an array.
166253
167- :param x:
254+ :param x: An array with the original inputs to be attacked.
255+ :type x: `np.ndarray`
168256 :param kwargs:
169- :return:
257+ :return: An array holding the adversarial examples of the same shape as input `x`.
258+ :rtype: `np.ndarray`
170259 """
171260 from art .utils import get_labels_np_array
172261
@@ -184,7 +273,6 @@ def generate(self, x, **kwargs):
184273 transform_values = self .transform (self .classifier , input_ , preds [i ])
185274
186275 for j , token_pos in enumerate (prioritized_tokens ):
187- # TODO otherwise, detect automatically if the transform operates in the embedding space
188276 if hasattr (self .transform , 'uses_embedding' ) and self .transform .uses_embedding :
189277 input_emb [token_pos , :] = transform_values [token_pos ]
190278 old_token = input_ [token_pos ]
@@ -209,6 +297,18 @@ def generate(self, x, **kwargs):
209297 def set_params (self , ** kwargs ):
210298 """
211299 Take in a dictionary of parameters and applies attack-specific checks before saving them as attributes.
300+
301+ :param transform: A callable strategy for transforming tokens. This should have a property `uses_embedding` set
302+ to true if the transformation is performed in the embedding space of the model.
303+ :type transform: `Callable`
304+ :param score: A callable strategy for scoring tokens. This order is subsequently used to determine the priority
305+ for changing the tokens as part of the attack.
306+ :type score: `Callable`
307+ :param stop_condition: A callable returning true if the stopping condition of the attack has been fulfilled.
308+ :type stop_condition: `Callable`
309+ :param nb_changes: Number of maximum changes allowed for each input. Each change usually corresponds with the
310+ displacement of one token.
311+ :type nb_changes: `int`
212312 """
213313 # Save attack-specific parameters
214314 super (ConfigurableTextAttack , self ).set_params (** kwargs )
0 commit comments