1111# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212# See the License for the specific language governing permissions and
1313# limitations under the License.
14-
1514"""Generates adversarial neighbors.
1615
1716This file provides the class(es) and the corresponding functional interface(s)
@@ -40,12 +39,22 @@ def _apply_feature_constraints(feature, min_value, max_value):
4039
4140
4241class _GenAdvNeighbor (abs_gen .GenNeighbor ):
43- """Class for generating adversarial neighbors.
44-
45- The core of this class implements the operation:
46- `adv_neighbor = input_features + adv_step_size * final_grad`
42+ """Class for generating adversarial neighbors based on gradient-based methods.
43+
44+ The core of this class implements the projected gradient descent (PGD)
45+ operation:
46+ ```
47+ adv_neighbor = input_features
48+ iterations = 10 # Number of iterations to run PGD.
49+ for _ in range(iterations):
50+ grad = gradient(adv_neighbor)
51+ adv_neighbor = adv_neighbor + adv_step_size * grad
52+ adv_neighbor = project(adv_neighbor)
53+ ```
4754 where `adv_step_size` is the step size (analogous to learning rate) for
48- searching/calculating adversarial neighbor.
55+ searching/calculating adversarial neighbor, `gradient(x)` calculates the
56+ gradient of the model at `x`, and `project(v)` projects the vector `v` onto
57+ the epsilon ball.
4958
5059 Attributes:
5160 labeled_loss: a scalar (`tf.float32`) tensor calculated from true labels (or
@@ -58,25 +67,34 @@ class _GenAdvNeighbor(abs_gen.GenNeighbor):
5867 `tf.DType`, like string or integer. (3) The feature is not involved in
5968 loss computation. If set to False, those input without gradient will be
6069 ignored silently and not perturbed. (default=False)
70+ pgd_model_fn: the model function. Takes in the input_features and produces a
71+ prediction. This is required for PGD with more than one step.
72+ pgd_loss_fn: the loss function. Calculates loss between prediction and
73+ ground truth.
6174 """
6275
6376 def __init__ (self ,
6477 labeled_loss ,
6578 adv_config ,
6679 raise_invalid_gradient = False ,
67- gradient_tape = None ):
80+ gradient_tape = None ,
81+ pgd_model_fn = None ,
82+ pgd_loss_fn = None ):
6883 self ._labeled_loss = labeled_loss
6984 self ._adv_config = adv_config
7085 self ._raise_invalid_gradient = raise_invalid_gradient
7186 self ._gradient_tape = gradient_tape
87+ self ._pgd_model_fn = pgd_model_fn
88+ self ._pgd_loss_fn = pgd_loss_fn
7289
73- def _compute_gradient (self , dense_features ):
74- """Computes the gradient of `self._labeled_loss` w.r.t. `dense_features` ."""
90+ def _compute_gradient (self , loss , dense_features , gradient_tape = None ):
91+ """Computes the gradient given a loss and dense features ."""
7592 feature_values = list (dense_features .values ())
76- if self . _gradient_tape is None : # Assuming in graph mode, no tape required.
77- grads = tf .gradients (self . _labeled_loss , feature_values )
93+ if gradient_tape is None :
94+ grads = tf .gradients (loss , feature_values )
7895 else :
79- grads = self ._gradient_tape .gradient (self ._labeled_loss , feature_values )
96+ grads = gradient_tape .gradient (loss , feature_values )
97+
8098 # The order of elements returned by .values() and .keys() are guaranteed
8199 # corresponding to each other.
82100 keyed_grads = dict (zip (dense_features .keys (), grads ))
@@ -131,7 +149,7 @@ def _split_dict(self, dictionary, predicate_fn):
131149 negatives [key ] = value
132150 return positives , negatives
133151
134- def gen_neighbor (self , input_features ):
152+ def gen_neighbor (self , input_features , pgd_labels = None ):
135153 """Generates adversarial neighbors and the corresponding weights.
136154
137155 This function perturbs only *dense* tensors to generate adversarial
@@ -148,6 +166,9 @@ def gen_neighbor(self, input_features):
148166 tensor(s) should be either:
149167 (a) pointwise samples: [batch_size, feat_len], or
150168 (b) sequence samples: [batch_size, seq_len, feat_len]
169+ pgd_labels: the labels corresponding to each input. This should have shape
170+ `[batch_size, 1]`. This is required for PGD-generated adversaries, and
171+ unused otherwise.
151172
152173 Returns:
153174 adv_neighbor: the perturbed example, with the same shape and structure as
@@ -163,41 +184,74 @@ def gen_neighbor(self, input_features):
163184 This error is suppressed if `raise_invalid_gradient` is set to False
164185 (which is the default).
165186 """
187+ loss = self ._labeled_loss
188+ gradient_tape = self ._gradient_tape
166189
167190 # Composes both features and feature_masks to dictionaries, so that the
168191 # feature_masks can be looked up by key.
169192 features = self ._compose_as_dict (input_features )
193+ dense_original_features , sparse_original_features = self ._split_dict (
194+ features , lambda feature : isinstance (feature , tf .Tensor ))
170195 feature_masks = self ._compose_as_dict (self ._adv_config .feature_mask )
171196 feature_min = self ._compose_as_dict (self ._adv_config .clip_value_min )
172197 feature_max = self ._compose_as_dict (self ._adv_config .clip_value_max )
173-
174- dense_features , sparse_features = self ._split_dict (
175- features , lambda feature : isinstance (feature , tf .Tensor ))
176- if sparse_features :
177- sparse_keys = str (sparse_features .keys ())
198+ if sparse_original_features :
199+ sparse_keys = str (sparse_original_features .keys ())
178200 if self ._raise_invalid_gradient :
179201 raise ValueError ('Cannot perturb non-Tensor input: ' + sparse_keys )
180202 logging .log_first_n (logging .WARNING ,
181203 'Cannot perturb non-Tensor input: %s' , 1 , sparse_keys )
182-
183- keyed_grads = self ._compute_gradient (dense_features )
184- masked_grads = {
185- key : utils .apply_feature_mask (grad , feature_masks .get (key , None ))
186- for key , grad in keyed_grads .items ()
187- }
188-
189- unit_perturbations = utils .maximize_within_unit_norm (
190- masked_grads , self ._adv_config .adv_grad_norm )
191- perturbations = tf .nest .map_structure (
192- lambda t : t * self ._adv_config .adv_step_size , unit_perturbations )
193-
194- # Sparse features are copied directly without perturbation.
195- adv_neighbor = dict (sparse_features )
196- for key , feature in dense_features .items ():
197- adv_neighbor [key ] = tf .stop_gradient (
198- _apply_feature_constraints (
199- feature + perturbations [key ] if key in perturbations else feature ,
200- feature_min .get (key , None ), feature_max .get (key , None )))
204+ dense_features = dense_original_features
205+ for t in range (self ._adv_config .iterations ):
206+ keyed_grads = self ._compute_gradient (loss , dense_features , gradient_tape )
207+ masked_grads = {
208+ key : utils .apply_feature_mask (grad , feature_masks .get (key , None ))
209+ for key , grad in keyed_grads .items ()
210+ }
211+
212+ unit_perturbations = utils .maximize_within_unit_norm (
213+ masked_grads , self ._adv_config .adv_grad_norm )
214+ perturbations = tf .nest .map_structure (
215+ lambda t : t * self ._adv_config .adv_step_size , unit_perturbations )
216+ # Clip perturbations into epsilon ball here. Note that this ball is
217+ # centered around the original input point.
218+ diff = {}
219+ bounded_diff = {}
220+ for key , perturb in perturbations .items ():
221+ # Only include features for which perturbation occurred. There is
222+ # nothing to project for features without perturbations.
223+ diff [key ] = dense_features [key ] + perturb - dense_original_features [key ]
224+ if self ._adv_config .epsilon is not None :
225+ bounded_diff = utils .project_to_ball (diff , self ._adv_config .epsilon ,
226+ self ._adv_config .adv_grad_norm )
227+ else :
228+ bounded_diff = diff
229+ # Backfill the rest of the dense features.
230+ for key , feature in dense_features .items ():
231+ if key not in bounded_diff :
232+ bounded_diff [key ] = feature - dense_original_features [key ]
233+ adv_neighbor = dict (sparse_original_features )
234+ for key , feature in dense_original_features .items ():
235+ adv_neighbor [key ] = tf .stop_gradient (
236+ _apply_feature_constraints (
237+ feature +
238+ bounded_diff [key ] if key in perturbations else feature ,
239+ feature_min .get (key , None ), feature_max .get (key , None )))
240+
241+ # Update for the next iteration.
242+ if t < self ._adv_config .iterations - 1 :
243+ inputs_t = self ._decompose_as (input_features , adv_neighbor )
244+ # Compute the new loss to calculate gradients with.
245+ features = self ._compose_as_dict (inputs_t )
246+ dense_features , _ = self ._split_dict (
247+ features , lambda feature : isinstance (feature , tf .Tensor ))
248+ if gradient_tape is not None :
249+ with gradient_tape :
250+ # Gradient calculated against dense features only.
251+ gradient_tape .watch (dense_features )
252+ loss = self ._pgd_loss_fn (pgd_labels , self ._pgd_model_fn (inputs_t ))
253+ else :
254+ loss = self ._pgd_loss_fn (pgd_labels , self ._pgd_model_fn (inputs_t ))
201255
202256 # Converts the perturbed examples back to their original structure.
203257 adv_neighbor = self ._decompose_as (input_features , adv_neighbor )
@@ -212,7 +266,10 @@ def gen_adv_neighbor(input_features,
212266 labeled_loss ,
213267 config ,
214268 raise_invalid_gradient = False ,
215- gradient_tape = None ):
269+ gradient_tape = None ,
270+ pgd_model_fn = None ,
271+ pgd_loss_fn = None ,
272+ pgd_labels = None ):
216273 """Generates adversarial neighbors for the given input and loss.
217274
218275 This function implements the following operation:
@@ -225,10 +282,10 @@ def gen_adv_neighbor(input_features,
225282 dictionary of feature names and dense tensors. The shape of the tensor(s)
226283 should be either:
227284 (a) pointwise samples: `[batch_size, feat_len]`, or
228- (b) sequence samples: `[batch_size, seq_len, feat_len]`.
229- Note that only dense (`float`) tensors in `input_features` will be
230- perturbed and all other features (`int`, `string`, or `SparseTensor`) will
231- be kept as-is in the returning `adv_neighbor`.
285+ (b) sequence samples: `[batch_size, seq_len, feat_len]`. Note that only
286+ dense (`float`) tensors in `input_features` will be perturbed and all
287+ other features (`int`, `string`, or `SparseTensor`) will be kept as-is
288+ in the returning `adv_neighbor`.
232289 labeled_loss: A scalar tensor of floating point type calculated from true
233290 labels (or supervisions).
234291 config: A `nsl.configs.AdvNeighborConfig` object containing the following
@@ -238,15 +295,21 @@ def gen_adv_neighbor(input_features,
238295 - 'adv_grad_norm': type of tensor norm to normalize the gradient.
239296 raise_invalid_gradient: (optional) A Boolean flag indicating whether to
240297 raise an error when gradients cannot be computed on any input feature.
241- There are three cases where this error may happen:
242- (1) The feature is a `SparseTensor`.
243- (2) The feature has a non-differentiable `dtype`, like string or integer.
244- (3) The feature is not involved in loss computation.
245- If set to `False` (default), those inputs without gradient will be ignored
246- silently and not perturbed.
298+ There are three cases where this error may happen: (1) The feature is a
299+ `SparseTensor`. (2) The feature has a non-differentiable `dtype`, like
300+ string or integer. (3) The feature is not involved in loss computation.
301+ If set to `False` (default), those inputs without gradient will be
302+ ignored silently and not perturbed.
247303 gradient_tape: A `tf.GradientTape` object watching the calculation from
248304 `input_features` to `labeled_loss`. Can be omitted if running in graph
249305 mode.
306+ pgd_model_fn: The model to generate adversaries for. Generates predictions
307+ for a given set of inputs, in the shape of `input_features`.
308+ pgd_loss_fn: The loss function. Takes samples of labels and a model
309+ predictions.
310+ pgd_labels: labels for the input features. This should have shape
311+ `[batch_size, 1]`. Required to generate adversaries with PGD, unused
312+ otherwise.
250313
251314 Returns:
252315 adv_neighbor: The perturbed example, with the same shape and structure as
@@ -259,6 +322,11 @@ def gen_adv_neighbor(input_features,
259322 features cannot be perturbed. See `raise_invalid_gradient` for situations
260323 where this can happen.
261324 """
262- adv_helper = _GenAdvNeighbor (labeled_loss , config , raise_invalid_gradient ,
263- gradient_tape )
264- return adv_helper .gen_neighbor (input_features )
325+ adv_helper = _GenAdvNeighbor (
326+ labeled_loss ,
327+ config ,
328+ raise_invalid_gradient ,
329+ gradient_tape ,
330+ pgd_model_fn = pgd_model_fn ,
331+ pgd_loss_fn = pgd_loss_fn )
332+ return adv_helper .gen_neighbor (input_features , pgd_labels )
0 commit comments