Skip to content

Commit 64db977

Browse files
committed
Updates to DETR: cleaned up resizing; correct clipping. Updates to notebook: validated attack with PGD. Updates to test: correct loss computation and PGD.
Signed-off-by: Kieran Fraser <[email protected]>
1 parent 478d9a7 commit 64db977

File tree

3 files changed

+325
-419
lines changed

3 files changed

+325
-419
lines changed

art/estimators/object_detection/pytorch_detection_transformer.py

Lines changed: 91 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ def __init__(
169169
between 0 and H and 0 and W
170170
- labels (Tensor[N]): the predicted labels for each image
171171
- scores (Tensor[N]): the scores or each prediction
172+
:param input_shape: Tuple of the form `(height, width)` of ints representing input image height and width
172173
:param clip_values: Tuple of the form `(min, max)` of floats or `np.ndarray` representing the minimum and
173174
maximum values allowed for features. If floats are provided, these will be used as the range of all
174175
features. If arrays are provided, each value will be considered the bound for a feature, thus
@@ -577,43 +578,10 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
577578
- labels [N]: the labels for each image
578579
- scores [N]: the scores or each prediction.
579580
"""
580-
import cv2
581581
import torch
582582

583-
# check if image with min, max dimensions, if not scale to 1000
584-
# if is within min, max dims, but not square, resize to max of image
585-
if (
586-
self._input_shape[1] < self.MIN_IMAGE_SIZE
587-
or self._input_shape[1] > self.MAX_IMAGE_SIZE
588-
or self._input_shape[2] < self.MIN_IMAGE_SIZE
589-
or self.input_shape[2] > self.MAX_IMAGE_SIZE
590-
):
591-
resized_imgs = []
592-
for i, _ in enumerate(x):
593-
resized_imgs.append(
594-
cv2.resize(
595-
(x * 255)[i].transpose(1, 2, 0).astype(np.uint8),
596-
dsize=(1000, 1000),
597-
interpolation=cv2.INTER_CUBIC,
598-
)
599-
)
600-
x = (np.array(resized_imgs) / 255).transpose(0, 3, 1, 2).astype(np.float32)
601-
elif self._input_shape[1] != self._input_shape[2]:
602-
rescale_dim = max(self._input_shape[1], self._input_shape[2])
603-
resized_imgs = []
604-
for i, _ in enumerate(x):
605-
resized_imgs.append(
606-
cv2.resize(
607-
(x * 255)[i].transpose(1, 2, 0).astype(np.uint8),
608-
dsize=(rescale_dim, rescale_dim),
609-
interpolation=cv2.INTER_CUBIC,
610-
)
611-
)
612-
x = (np.array(resized_imgs) / 255).transpose(0, 3, 1, 2).astype(np.float32)
613-
614-
x = x.copy()
615-
616583
self._model.eval()
584+
x, _ = self._apply_resizing(x, None)
617585

618586
# Apply preprocessing
619587
x_preprocessed, _ = self._apply_preprocessing(x, y=None, fit=False)
@@ -633,7 +601,7 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
633601
predictions.append(
634602
{
635603
"boxes": rescale_bboxes(
636-
model_output["pred_boxes"][i, :, :], (self._input_shape[1], self._input_shape[2])
604+
model_output["pred_boxes"][i, :, :], (self._input_shape[2], self._input_shape[1])
637605
)
638606
.detach()
639607
.numpy(),
@@ -765,22 +733,8 @@ def loss_gradient(
765733
- labels (Tensor[N]): the predicted labels for each image
766734
:return: Loss gradients of the same shape as `x`.
767735
"""
768-
import torch
769-
770-
_y = []
771-
for target in y:
772-
cxcy_norm = revert_rescale_bboxes(
773-
torch.from_numpy(target["boxes"]), (self.input_shape[1], self.input_shape[2])
774-
)
775-
_y.append(
776-
{
777-
"labels": torch.from_numpy(target["labels"]).type(torch.int64).to(self.device),
778-
"boxes": cxcy_norm.to(self.device),
779-
"scores": torch.from_numpy(target["scores"]).type(torch.float).to(self.device),
780-
}
781-
)
782-
783-
output, inputs_t, image_tensor_list_grad = self._get_losses(x=x, y=_y)
736+
x, y = self._apply_resizing(x, y)
737+
output, inputs_t, image_tensor_list_grad = self._get_losses(x=x, y=y)
784738
loss = sum(output[k] * self.weight_dict[k] for k in output.keys() if k in self.weight_dict)
785739

786740
self._model.zero_grad()
@@ -833,6 +787,7 @@ def compute_losses(
833787
- scores (Tensor[N]): the scores or each prediction.
834788
:return: Dictionary of loss components.
835789
"""
790+
x, y = self._apply_resizing(x, y)
836791
output_tensor, _, _ = self._get_losses(x=x, y=y)
837792
output = {}
838793
for key, value in output_tensor.items():
@@ -859,6 +814,7 @@ def compute_loss( # type: ignore
859814
"""
860815
import torch
861816

817+
x, y = self._apply_resizing(x, y)
862818
output, _, _ = self._get_losses(x=x, y=y)
863819

864820
# Compute the gradient and return
@@ -876,6 +832,90 @@ def compute_loss( # type: ignore
876832

877833
return loss.detach().cpu().numpy()
878834

835+
def _apply_resizing(self, x: Union[np.ndarray, "torch.Tensor"],
836+
y: List[Dict[str, Union[np.ndarray, "torch.Tensor"]]],
837+
height: int = 800,
838+
width: int = 800):
839+
"""
840+
Resize the input and targets to dimensions expected by DETR.
841+
842+
:param x: Array or Tensor representing images of any size
843+
:param y: List of targets to be transformed
844+
:param height: Int representing desired height, the default is compatible with DETR
845+
:param width: Int representing desired width, the default is compatible with DETR
846+
"""
847+
import cv2
848+
import torchvision.transforms as T
849+
import torch
850+
851+
if (
852+
self._input_shape[1] < self.MIN_IMAGE_SIZE
853+
or self._input_shape[1] > self.MAX_IMAGE_SIZE
854+
or self._input_shape[2] < self.MIN_IMAGE_SIZE
855+
or self.input_shape[2] > self.MAX_IMAGE_SIZE
856+
):
857+
resized_imgs = []
858+
if isinstance(x, torch.Tensor):
859+
x = T.Resize(size = (height, width))(x)
860+
else:
861+
for i, _ in enumerate(x):
862+
resized = cv2.resize(
863+
(x)[i].transpose(1, 2, 0),
864+
dsize=(height, width),
865+
interpolation=cv2.INTER_CUBIC,
866+
)
867+
resized = resized.transpose(2, 0, 1)
868+
resized_imgs.append(
869+
resized
870+
)
871+
x = np.array(resized_imgs)
872+
873+
elif self._input_shape[1] != self._input_shape[2]:
874+
rescale_dim = max(self._input_shape[1], self._input_shape[2])
875+
resized_imgs = []
876+
if isinstance(x, torch.Tensor):
877+
x = T.Resize(size = (rescale_dim,rescale_dim))(x)
878+
else:
879+
for i, _ in enumerate(x):
880+
resized = cv2.resize(
881+
(x)[i].transpose(1, 2, 0),
882+
dsize=(rescale_dim, rescale_dim),
883+
interpolation=cv2.INTER_CUBIC,
884+
)
885+
resized = resized.transpose(2, 0, 1)
886+
resized_imgs.append(
887+
resized
888+
)
889+
x = np.array(resized_imgs)
890+
891+
targets = []
892+
if y is not None:
893+
if isinstance(y[0]['boxes'], torch.Tensor):
894+
for target in y:
895+
cxcy_norm = revert_rescale_bboxes(
896+
target["boxes"], (self.input_shape[2], self.input_shape[1])
897+
)
898+
targets.append(
899+
{
900+
"labels": target["labels"].type(torch.int64).to(self.device),
901+
"boxes": cxcy_norm.to(self.device),
902+
"scores": target["scores"].type(torch.float).to(self.device),
903+
}
904+
)
905+
else:
906+
for target in y:
907+
cxcy_norm = revert_rescale_bboxes(
908+
torch.from_numpy(target["boxes"]), (self.input_shape[2], self.input_shape[1])
909+
)
910+
targets.append(
911+
{
912+
"labels": torch.from_numpy(target["labels"]).type(torch.int64).to(self.device),
913+
"boxes": cxcy_norm.to(self.device),
914+
"scores": torch.from_numpy(target["scores"]).type(torch.float).to(self.device),
915+
}
916+
)
917+
918+
return x, targets
879919

880920
class NestedTensor:
881921
"""

notebooks/adversarial_patch/attack_adversarial_patch_detr.ipynb

Lines changed: 202 additions & 281 deletions
Large diffs are not rendered by default.

tests/estimators/object_detection/test_pytorch_detection_transformer.py

Lines changed: 32 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -74,28 +74,18 @@ def test_predict(get_pytorch_detr):
7474
assert list(result[0].keys()) == ["boxes", "labels", "scores"]
7575

7676
assert result[0]["boxes"].shape == (100, 4)
77-
expected_detection_boxes = np.asarray([9.0386868e-03, 5.1708374e00, 7.4301929e00, 3.1964935e01])
77+
expected_detection_boxes = np.asarray([-5.9490204e-03, 1.1947733e+01, 3.1993944e+01, 3.1925127e+01])
7878
np.testing.assert_array_almost_equal(result[0]["boxes"][2, :], expected_detection_boxes, decimal=3)
7979

8080
assert result[0]["scores"].shape == (100,)
8181
expected_detection_scores = np.asarray(
82-
[
83-
0.00383973,
84-
0.0167976,
85-
0.01714019,
86-
0.00073999,
87-
0.00467391,
88-
0.02399586,
89-
0.00093301,
90-
0.02143953,
91-
0.00202136,
92-
0.00266351,
93-
]
82+
[0.00679839, 0.0250559 , 0.07205943, 0.01115368, 0.03321039,
83+
0.10407761, 0.00113309, 0.01442852, 0.00527624, 0.01240906]
9484
)
9585
np.testing.assert_array_almost_equal(result[0]["scores"][:10], expected_detection_scores, decimal=6)
9686

9787
assert result[0]["labels"].shape == (100,)
98-
expected_detection_classes = np.asarray([17, 17, 17, 3, 88, 17, 17, 17, 88, 17])
88+
expected_detection_classes = np.asarray([17, 17, 33, 17, 17, 17, 74, 17, 17, 17])
9989
np.testing.assert_array_almost_equal(result[0]["labels"][:10], expected_detection_classes, decimal=6)
10090

10191

@@ -106,84 +96,31 @@ def test_loss_gradient(get_pytorch_detr):
10696

10797
grads = object_detector.loss_gradient(x=x_test, y=y_test)
10898

109-
assert grads.shape == (2, 3, 32, 32)
99+
assert grads.shape == (2, 3, 800, 800)
110100

111101
expected_gradients1 = np.asarray(
112-
[
113-
0.04711548,
114-
0.25275955,
115-
0.3609573,
116-
-0.02207462,
117-
0.02886475,
118-
0.05820496,
119-
0.04151949,
120-
-0.07008387,
121-
0.24270807,
122-
0.17703517,
123-
-0.29346713,
124-
-0.11548031,
125-
-0.15658003,
126-
-0.1412788,
127-
0.02577158,
128-
-0.00550455,
129-
0.05846804,
130-
-0.04419752,
131-
0.06333683,
132-
-0.15242189,
133-
-0.06642783,
134-
-0.09545745,
135-
-0.01154867,
136-
0.07477856,
137-
0.05444539,
138-
0.01678686,
139-
0.01427085,
140-
0.01382115,
141-
-0.15745601,
142-
-0.13278124,
143-
0.06169066,
144-
-0.03915803,
145-
]
102+
[-0.00061366, 0.00322502, -0.00039866, -0.00807413, -0.00476555,
103+
0.00181204, 0.01007765, 0.00415828, -0.00073114, 0.00018387,
104+
-0.00146992, -0.00119636, -0.00098966, -0.00295517, -0.0024271 ,
105+
-0.00131314, -0.00149217, -0.00104926, -0.00154239, -0.00110989,
106+
0.00092887, 0.00049146, -0.00292508, -0.00124526, 0.00140347,
107+
0.00019833, 0.00191074, -0.00117537, -0.00080604, 0.00057427,
108+
-0.00061728, -0.00206535]
146109
)
147110

148-
np.testing.assert_array_almost_equal(grads[0, 0, 10, :], expected_gradients1, decimal=2)
111+
np.testing.assert_array_almost_equal(grads[0, 0, 10, :32], expected_gradients1, decimal=2)
149112

150113
expected_gradients2 = np.asarray(
151-
[
152-
-0.10913675,
153-
0.00539385,
154-
0.11588555,
155-
0.02486979,
156-
-0.23739402,
157-
-0.01673118,
158-
-0.09709811,
159-
0.00763445,
160-
0.10815062,
161-
-0.3278629,
162-
-0.23222731,
163-
0.28806347,
164-
-0.14222082,
165-
-0.24168995,
166-
-0.20170388,
167-
-0.24570045,
168-
-0.01220985,
169-
-0.18616645,
170-
-0.19678666,
171-
-0.12424485,
172-
-0.36253023,
173-
0.08978511,
174-
-0.02874891,
175-
-0.09320692,
176-
-0.26761073,
177-
-0.34595487,
178-
-0.34932154,
179-
-0.21606845,
180-
-0.07342689,
181-
-0.0573133,
182-
-0.04900078,
183-
0.03462576,
184-
]
114+
[-1.1787530e-03, -2.8500680e-03, 5.0884970e-03, 6.4504531e-04,
115+
-6.8841036e-05, 2.8184296e-03, 3.0257765e-03, 2.8565727e-04,
116+
-1.0701057e-04, 1.2945699e-03, 7.3593057e-04, 1.0177144e-03,
117+
-2.4692707e-03, -1.3801848e-03, 6.3182280e-04, -4.2305476e-04,
118+
4.4307750e-04, 8.5821096e-04, -7.1204413e-04, -3.1404425e-03,
119+
-1.5964351e-03, -1.9222996e-03, -5.3157361e-04, -9.9202688e-04,
120+
-1.5815455e-03, 2.0060266e-04, -2.0584739e-03, 6.6960667e-04,
121+
9.7393827e-04, -1.6040013e-03, -6.9741381e-04, 1.4657658e-04]
185122
)
186-
np.testing.assert_array_almost_equal(grads[1, 0, 10, :], expected_gradients2, decimal=2)
123+
np.testing.assert_array_almost_equal(grads[1, 0, 10, :32], expected_gradients2, decimal=2)
187124

188125

189126
@pytest.mark.only_with_platform("pytorch")
@@ -251,7 +188,7 @@ def test_preprocessing_defences(get_pytorch_detr):
251188
# Compute gradients
252189
grads = object_detector.loss_gradient(x=x_test, y=y)
253190

254-
assert grads.shape == (2, 3, 32, 32)
191+
assert grads.shape == (2, 3, 800, 800)
255192

256193

257194
@pytest.mark.only_with_platform("pytorch")
@@ -286,7 +223,7 @@ def test_compute_loss(get_pytorch_detr):
286223
# Compute loss
287224
loss = object_detector.compute_loss(x=x_test, y=y)
288225

289-
assert pytest.approx(63.9855, abs=0.01) == float(loss)
226+
assert pytest.approx(3.9634, abs=0.01) == float(loss)
290227

291228

292229
@pytest.mark.only_with_platform("pytorch")
@@ -295,6 +232,14 @@ def test_pgd(get_pytorch_detr):
295232
object_detector, x_test, y_test = get_pytorch_detr
296233

297234
from art.attacks.evasion import ProjectedGradientDescent
235+
from PIL import Image
236+
237+
imgs = []
238+
for i in x_test:
239+
img = Image.fromarray((i*255).astype(np.uint8).transpose(1,2,0))
240+
img = img.resize(size=(800, 800))
241+
imgs.append(np.array(img))
242+
x_test = np.array(imgs).transpose(0, 3, 1, 2)
298243

299244
attack = ProjectedGradientDescent(estimator=object_detector, max_iter=2)
300245
x_test_adv = attack.generate(x=x_test, y=y_test)

0 commit comments

Comments
 (0)