Skip to content

Commit b3bad7f

Browse files
authored
Merge pull request #2720 from nicholasadriel/one-pixel-shortcut-attack
Add OnePixelShortcutAttack poisoning attack and its unit tests
2 parents 293bd22 + 8e7421b commit b3bad7f

File tree

3 files changed

+410
-0
lines changed

3 files changed

+410
-0
lines changed

art/attacks/poisoning/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@
1919
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_pytorch import HiddenTriggerBackdoorPyTorch
2020
from art.attacks.poisoning.hidden_trigger_backdoor.hidden_trigger_backdoor_keras import HiddenTriggerBackdoorKeras
2121
from art.attacks.poisoning.sleeper_agent_attack import SleeperAgentAttack
22+
from art.attacks.poisoning.one_pixel_shortcut_attack import OnePixelShortcutAttack
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
# MIT License
2+
#
3+
# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2025
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6+
# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
7+
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
8+
# persons to whom the Software is furnished to do so, subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
11+
# Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14+
# WARRANTIES of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
15+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE for any claim, damages or other liability, whether in an action of contract,
16+
# TORT OR OTHERWISE, ARISING from, out of or in connection with the software or the use or other dealings in the
17+
# Software.
18+
"""
19+
This module implements One Pixel Shortcut attacks on Deep Neural Networks.
20+
"""
21+
22+
23+
from __future__ import annotations
24+
25+
import numpy as np
26+
27+
from art.attacks.attack import PoisoningAttackBlackBox
28+
29+
30+
class OnePixelShortcutAttack(PoisoningAttackBlackBox):
31+
"""
32+
One-Pixel Shortcut (OPS) poisoning attack.
33+
This attack finds a single pixel (and channel value) that acts as a "shortcut"
34+
for each class by maximizing a mean-minus-variance objective over that class's
35+
images. The found pixel coordinate and color are applied to all images of the class
36+
(labels remain unchanged). Reference: Wu et al. (ICLR 2023).
37+
38+
| Paper link: https://arxiv.org/abs/2205.12141
39+
"""
40+
41+
attack_params: list = [] # No external parameters for this attack
42+
_estimator_requirements: tuple = ()
43+
44+
def __init__(self):
45+
super().__init__()
46+
47+
def _check_params(self):
48+
# No parameters to validate
49+
pass
50+
51+
def poison(self, x: np.ndarray, y: np.ndarray | None = None, **kwargs) -> tuple[np.ndarray, np.ndarray]:
52+
"""
53+
Generate an OPS-poisoned dataset from clean data.
54+
55+
:param x: Clean input samples, as a Numpy array of shape (N, H, W, C) or (N, C, H, W), with values in [0, 1].
56+
:param y: Corresponding labels (shape (N,) or one-hot (N, K)). Required for class-wise perturbation.
57+
:return: Tuple (x_poisoned, y_poisoned) with one pixel modified per image.
58+
"""
59+
if y is None:
60+
raise ValueError("Labels y must be provided for the One-Pixel Shortcut attack.")
61+
# Copy labels to return (labels are not changed by poisoning)
62+
y_poison = y.copy()
63+
64+
# Convert inputs to numpy array (if not already) and determine channel format
65+
x_array = np.array(x, copy=False)
66+
if x_array.ndim == 3:
67+
# Input shape (N, H, W) - single-channel images without explicit channel dim
68+
x_orig = x_array.reshape((x_array.shape[0], x_array.shape[1], x_array.shape[2], 1)).astype(np.float32)
69+
channels_first = False
70+
grayscale = True
71+
elif x_array.ndim == 4:
72+
# Determine if format is NCHW or NHWC by examining dimensions
73+
# Assume channel count is 1, 3, or 4 for common cases (grayscale, RGB, RGBA)
74+
if x_array.shape[1] in (1, 3, 4) and x_array.shape[-1] not in (1, 3, 4):
75+
# Likely (N, C, H, W) format
76+
x_orig = np.transpose(x_array, (0, 2, 3, 1)).astype(np.float32)
77+
channels_first = True
78+
elif x_array.shape[-1] in (1, 3, 4) and x_array.shape[1] not in (1, 3, 4):
79+
# Likely (N, H, W, C) format
80+
x_orig = x_array.astype(np.float32)
81+
channels_first = False
82+
else:
83+
# Ambiguous case: if both middle and last dims could be channels (e.g. tiny images)
84+
# Default to treating last dimension as channels if it matches a known channel count
85+
if x_array.shape[-1] in (1, 3, 4):
86+
x_orig = x_array.astype(np.float32)
87+
channels_first = False
88+
else:
89+
x_orig = np.transpose(x_array, (0, 2, 3, 1)).astype(np.float32)
90+
channels_first = True
91+
grayscale = x_orig.shape[3] == 1
92+
else:
93+
raise ValueError(f"Unsupported input tensor shape: {x_array.shape}")
94+
95+
# x_orig is now (N, H, W, C) in float32
96+
n, h, w, c = x_orig.shape
97+
# Prepare class index labels
98+
labels = y.copy()
99+
if labels.ndim > 1:
100+
labels = labels.argmax(axis=1)
101+
labels = labels.astype(int)
102+
103+
# Initialize output poisoned data array
104+
x_poison = x_orig.copy()
105+
106+
# Compute optimal pixel for each class
107+
classes = np.unique(labels)
108+
for cls in classes:
109+
idx = np.where(labels == cls)[0]
110+
if idx.size == 0:
111+
continue # skip if no samples for this class
112+
imgs_c = x_orig[idx] # subset of images of class `cls`, shape (n_c, H, W, C)
113+
best_score = -np.inf
114+
best_coord = None
115+
best_color = None
116+
# Determine target color options: extremes (0 or 1 in each channel)
117+
if c == 1:
118+
target_options = [
119+
np.array([0.0], dtype=x_orig.dtype),
120+
np.array([1.0], dtype=x_orig.dtype),
121+
]
122+
else:
123+
target_options = [np.array(bits, dtype=x_orig.dtype) for bits in np.ndindex(*(2,) * c)]
124+
# Evaluate each candidate color
125+
for target_vec in target_options:
126+
# Compute per-image average difference from target for all pixels
127+
diffs = np.abs(imgs_c - target_vec) # shape (n_c, H, W, C)
128+
per_image_diff = diffs.mean(axis=3) # shape (n_c, H, W), mean diff per image at each pixel
129+
# Compute score = mean - var for each pixel position (vectorized over HxW)
130+
mean_diff_map = per_image_diff.mean(axis=0) # shape (H, W)
131+
var_diff_map = per_image_diff.var(axis=0) # shape (H, W)
132+
score_map = mean_diff_map - var_diff_map # shape (H, W)
133+
# Find the pixel with maximum score for this target
134+
max_idx_flat = np.argmax(score_map)
135+
max_score = score_map.ravel()[max_idx_flat]
136+
if max_score > best_score:
137+
best_score = float(max_score)
138+
# Convert flat index to 2D coordinates (i, j)
139+
best_coord = (max_idx_flat // w, max_idx_flat % w)
140+
best_color = target_vec
141+
# Apply the best pixel perturbation to all images of this class
142+
if best_coord is not None:
143+
i_star, j_star = best_coord
144+
x_poison[idx, i_star, j_star, :] = best_color
145+
146+
# Restore original data format and type
147+
if channels_first:
148+
x_poison = np.transpose(x_poison, (0, 3, 1, 2))
149+
if grayscale:
150+
x_poison = x_poison.reshape(n, h, w)
151+
x_poison = x_poison.astype(x_array.dtype)
152+
return x_poison, y_poison

0 commit comments

Comments
 (0)