Merge pull request #1470 from Trusted-AI/development_issue_1469

beat-buesser · web-flow · commit acbf67df8306 · 2022-01-06T23:14:36.000Z
Preprocessing defences in PyTorchGoturn and VideoCompression for [0, 1] range
diff --git a/art/defences/preprocessor/video_compression.py b/art/defences/preprocessor/video_compression.py
@@ -27,6 +27,7 @@
 import os
 from tempfile import TemporaryDirectory
 from typing import Optional, Tuple
+import warnings
 
 import numpy as np
 from tqdm.auto import tqdm
@@ -78,7 +79,8 @@ def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.nd
         """
         Apply video compression to sample `x`.
 
-        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be in the data range [0, 255].
+        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be either in range [0, 1] or
+                  [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Compressed sample.
         """
@@ -92,6 +94,9 @@ def compress_video(x: np.ndarray, video_format: str, constant_rate_factor: int,
             video_path = os.path.join(dir_, f"tmp_video.{video_format}")
             _, height, width, _ = x.shape
 
+            if (height % 2) != 0 or (width % 2) != 0:
+                warnings.warn("Codec might require even number of pixels in height and width.")
+
             # numpy to local video file
             process = (
                 ffmpeg.input("pipe:", format="rawvideo", pix_fmt="rgb24", s=f"{width}x{height}")
@@ -118,11 +123,19 @@ def compress_video(x: np.ndarray, video_format: str, constant_rate_factor: int,
             x = np.transpose(x, (0, 2, 3, 4, 1))
 
         # apply video compression per video item
+        scale = 1
+        if x.min() >= 0 and x.max() <= 1.0:
+            scale = 255
+
         x_compressed = x.copy()
         with TemporaryDirectory(dir=config.ART_DATA_PATH) as tmp_dir:
             for i, x_i in enumerate(tqdm(x, desc="Video compression", disable=not self.verbose)):
+                x_i *= scale
                 x_compressed[i] = compress_video(x_i, self.video_format, self.constant_rate_factor, dir_=tmp_dir)
 
+        x_compressed = x_compressed / scale
+        x_compressed = x_compressed.astype(x.dtype)
+
         if self.channels_first:
             x_compressed = np.transpose(x_compressed, (0, 4, 1, 2, 3))
 
diff --git a/art/defences/preprocessor/video_compression_pytorch.py b/art/defences/preprocessor/video_compression_pytorch.py
@@ -116,11 +116,17 @@ def forward(
         """
         Apply video compression to sample `x`.
 
-        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be in the data range [0, 255].
+        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be either in range [0, 1] or
+                  [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Compressed sample.
         """
+        scale = 1
+        if x.min() >= 0 and x.max() <= 1.0:
+            scale = 255
+        x = x * scale
         x_compressed = self._compression_pytorch_numpy.apply(x)
+        x_compressed = x_compressed / scale
         return x_compressed, y
 
     def _check_params(self) -> None:
diff --git a/art/estimators/object_tracking/pytorch_goturn.py b/art/estimators/object_tracking/pytorch_goturn.py
@@ -670,10 +670,11 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
                 x_i = x[i].to(self.device)
 
             # Apply preprocessing
+            x_i = torch.unsqueeze(x_i, dim=0)
             x_i, _ = self._apply_preprocessing(x_i, y=None, fit=False, no_grad=False)
+            x_i = torch.squeeze(x_i)
 
             y_pred = self._track(x=x_i, y_init=y_init[i])
-
             prediction_dict = dict()
             if isinstance(x, np.ndarray):
                 prediction_dict["boxes"] = y_pred.detach().cpu().numpy()