@@ -1943,22 +1943,6 @@ def _distance(self, x0, x):
19431943
19441944
19451945class BrendelBethgeAttack (EvasionAttack ):
1946-
1947- attack_params = EvasionAttack .attack_params + [
1948- "norm" ,
1949- "targeted" ,
1950- "init_attack" ,
1951- "overshoot" ,
1952- "steps" ,
1953- "lr" ,
1954- "lr_decay" ,
1955- "lr_num_decay" ,
1956- "momentum" ,
1957- "binary_search_steps" ,
1958- "init_size" ,
1959- ]
1960- _estimator_requirements = (BaseEstimator , LossGradientsMixin , ClassifierMixin )
1961-
19621946 """
19631947 Base class for the Brendel & Bethge adversarial attack [#Bren19]_, a powerful gradient-based adversarial attack that
19641948 follows the adversarial boundary (the boundary between the space of adversarial and non-adversarial images as
@@ -1968,43 +1952,34 @@ class BrendelBethgeAttack(EvasionAttack):
19681952 https://github.com/bethgelab/foolbox/blob/master/foolbox/attacks/brendel_bethge.py.
19691953
19701954 Implementation differs from the attack used in the paper in two ways:
1955+
19711956 * The initial binary search is always using the full 10 steps (for ease of implementation).
19721957 * The adaptation of the trust region over the course of optimisation is less
19731958 greedy but is more robust, reliable and simpler (decay every K steps)
19741959
1975- Args:
1976- estimator : A trained ART classifier providing loss gradients.
1977- norm : The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
1978- targeted : Flag determining if attack is targeted.
1979- overshoot : If 1 the attack tries to return exactly to the adversarial boundary
1980- in each iteration. For higher values the attack tries to overshoot
1981- over the boundary to ensure that the perturbed sample in each iteration
1982- is adversarial.
1983- steps : Maximum number of iterations to run. Might converge and stop
1984- before that.
1985- lr : Trust region radius, behaves similar to a learning rate. Smaller values
1986- decrease the step size in each iteration and ensure that the attack
1987- follows the boundary more faithfully.
1988- lr_decay : The trust region lr is multiplied with lr_decay in regular intervals (see
1989- lr_num_decay).
1990- lr_num_decay : Number of learning rate decays in regular intervals of
1991- length steps / lr_num_decay.
1992- momentum : Averaging of the boundary estimation over multiple steps. A momentum of
1993- zero would always take the current estimate while values closer to one
1994- average over a larger number of iterations.
1995- binary_search_steps : Number of binary search steps used to find the adversarial boundary
1996- between the starting point and the clean image.
1997- batch_size : Batch size for evaluating the model for predictions and gradients.
1998- init_size : Maximum number of random search steps to find initial adversarial example.
1999-
20001960 References:
2001- .. [#Bren19] Wieland Brendel, Jonas Rauber, Matthias Kümmerer,
2002- Ivan Ustyuzhaninov, Matthias Bethge,
1961+ .. [#Bren19] Wieland Brendel, Jonas Rauber, Matthias Kümmerer, Ivan Ustyuzhaninov, Matthias Bethge,
20031962 "Accurate, reliable and fast robustness evaluation",
20041963 33rd Conference on Neural Information Processing Systems (2019)
20051964 https://arxiv.org/abs/1907.01003
20061965 """
20071966
1967+ attack_params = EvasionAttack .attack_params + [
1968+ "norm" ,
1969+ "targeted" ,
1970+ "init_attack" ,
1971+ "overshoot" ,
1972+ "steps" ,
1973+ "lr" ,
1974+ "lr_decay" ,
1975+ "lr_num_decay" ,
1976+ "momentum" ,
1977+ "binary_search_steps" ,
1978+ "init_size" ,
1979+ ]
1980+
1981+ _estimator_requirements = (BaseEstimator , LossGradientsMixin , ClassifierMixin )
1982+
20081983 def __init__ (
20091984 self ,
20101985 estimator : "CLASSIFIER_LOSS_GRADIENTS_TYPE" ,
@@ -2020,6 +1995,25 @@ def __init__(
20201995 init_size : int = 100 ,
20211996 batch_size : int = 32 ,
20221997 ):
1998+ """
1999+ :param estimator: A trained ART classifier providing loss gradients.
2000+ :param norm: The norm of the adversarial perturbation. Possible values: "inf", np.inf, 1 or 2.
2001+ :param targeted: Flag determining if attack is targeted.
2002+ :param overshoot: If 1 the attack tries to return exactly to the adversarial boundary in each iteration. For
2003+ higher values the attack tries to overshoot over the boundary to ensure that the perturbed
2004+ sample in each iteration is adversarial.
2005+ :param steps: Maximum number of iterations to run. Might converge and stop before that.
2006+ :param lr: Trust region radius, behaves similar to a learning rate. Smaller values decrease the step size in
2007+ each iteration and ensure that the attack follows the boundary more faithfully.
2008+ :param lr_decay: The trust region lr is multiplied with lr_decay in regular intervals (see lr_num_decay).
2009+ :param lr_num_decay: Number of learning rate decays in regular intervals of length steps / lr_num_decay.
2010+ :param momentum: Averaging of the boundary estimation over multiple steps. A momentum of zero would always take
2011+ the current estimate while values closer to one average over a larger number of iterations.
2012+ :param binary_search_steps: Number of binary search steps used to find the adversarial boundary between the
2013+ starting point and the clean image.
2014+ :param init_size: Maximum number of random search steps to find initial adversarial example.
2015+ :param batch_size: Batch size for evaluating the model for predictions and gradients.
2016+ """
20232017 from art .estimators .classification import TensorFlowV2Classifier , PyTorchClassifier
20242018
20252019 if isinstance (estimator , TensorFlowV2Classifier ):
@@ -2170,22 +2164,25 @@ def logits_difference(y_pred, y_true): # type: ignore
21702164 else :
21712165 self .theta = 0.01 / np .prod (self .estimator .input_shape )
21722166
2173- def generate ( # pylint: disable=W0221
2167+ def generate (
21742168 self ,
21752169 x : np .ndarray ,
21762170 y : Optional [np .ndarray ] = None ,
2177- starting_points : Optional [np .ndarray ] = None ,
2178- early_stop : Optional [float ] = None ,
21792171 ** kwargs ,
21802172 ) -> np .ndarray :
21812173 """
21822174 Applies the Brendel & Bethge attack.
21832175
21842176 :param x: The original clean inputs.
21852177 :param y: The labels for inputs `x`.
2186- :param starting_points: Adversarial inputs to use as a starting points, in particular for targeted attacks.
2187- :param early_stop: Early-stopping criteria.
2178+
2179+ :Keyword Arguments:
2180+ * *starting_points* (``np.ndarray``)
2181+ Optional. Adversarial inputs to use as a starting points, in particular for targeted attacks.
21882182 """
2183+ starting_points = kwargs .get ("starting_points" )
2184+ # early_stop = kwargs.get("early_stop")
2185+
21892186 originals = x .copy ()
21902187
21912188 y = check_and_transform_label_format (y , self .estimator .nb_classes )
0 commit comments