Merge pull request #1476 from Trusted-AI/dev_1.9.1

beat-buesser · web-flow · commit 82c70edbc36b · 2022-01-07T12:52:04.000Z
Update to ART 1.9.1
diff --git a/art/__init__.py b/art/__init__.py
@@ -12,7 +12,7 @@
 from art import preprocessing
 
 # Semantic Version
-__version__ = "1.9.0"
+__version__ = "1.9.1.dev0"
 
 # pylint: disable=C0103
 
diff --git a/art/attacks/evasion/laser_attack/utils.py b/art/attacks/evasion/laser_attack/utils.py
@@ -27,7 +27,6 @@
 from typing import Any, Callable, List, Tuple, Union
 
 import numpy as np
-import matplotlib.pyplot as plt
 
 
 class Line:
@@ -256,6 +255,8 @@ def save_nrgb_image(image: np.ndarray, number=0, name_length=5, directory="attac
     :param name_length: Length of the random string in the name.
     :param directory: Directory where images will be saved.
     """
+    import matplotlib.pyplot as plt
+
     alphabet = np.array(list(string.ascii_letters))
     Path(directory).mkdir(exist_ok=True)
     im_name = f"{directory}/{number}_{''.join(np.random.choice(alphabet, size=name_length))}.jpg"
diff --git a/art/defences/preprocessor/video_compression.py b/art/defences/preprocessor/video_compression.py
@@ -27,6 +27,7 @@
 import os
 from tempfile import TemporaryDirectory
 from typing import Optional, Tuple
+import warnings
 
 import numpy as np
 from tqdm.auto import tqdm
@@ -78,7 +79,8 @@ def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.nd
         """
         Apply video compression to sample `x`.
 
-        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be in the data range [0, 255].
+        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be either in range [0, 1] or
+                  [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Compressed sample.
         """
@@ -92,6 +94,9 @@ def compress_video(x: np.ndarray, video_format: str, constant_rate_factor: int,
             video_path = os.path.join(dir_, f"tmp_video.{video_format}")
             _, height, width, _ = x.shape
 
+            if (height % 2) != 0 or (width % 2) != 0:
+                warnings.warn("Codec might require even number of pixels in height and width.")
+
             # numpy to local video file
             process = (
                 ffmpeg.input("pipe:", format="rawvideo", pix_fmt="rgb24", s=f"{width}x{height}")
@@ -118,11 +123,19 @@ def compress_video(x: np.ndarray, video_format: str, constant_rate_factor: int,
             x = np.transpose(x, (0, 2, 3, 4, 1))
 
         # apply video compression per video item
+        scale = 1
+        if x.min() >= 0 and x.max() <= 1.0:
+            scale = 255
+
         x_compressed = x.copy()
         with TemporaryDirectory(dir=config.ART_DATA_PATH) as tmp_dir:
             for i, x_i in enumerate(tqdm(x, desc="Video compression", disable=not self.verbose)):
+                x_i *= scale
                 x_compressed[i] = compress_video(x_i, self.video_format, self.constant_rate_factor, dir_=tmp_dir)
 
+        x_compressed = x_compressed / scale
+        x_compressed = x_compressed.astype(x.dtype)
+
         if self.channels_first:
             x_compressed = np.transpose(x_compressed, (0, 4, 1, 2, 3))
 
diff --git a/art/defences/preprocessor/video_compression_pytorch.py b/art/defences/preprocessor/video_compression_pytorch.py
@@ -116,11 +116,17 @@ def forward(
         """
         Apply video compression to sample `x`.
 
-        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be in the data range [0, 255].
+        :param x: Sample to compress of shape NCFHW or NFHWC. `x` values are expected to be either in range [0, 1] or
+                  [0, 255].
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Compressed sample.
         """
+        scale = 1
+        if x.min() >= 0 and x.max() <= 1.0:
+            scale = 255
+        x = x * scale
         x_compressed = self._compression_pytorch_numpy.apply(x)
+        x_compressed = x_compressed / scale
         return x_compressed, y
 
     def _check_params(self) -> None:
diff --git a/art/estimators/classification/keras.py b/art/estimators/classification/keras.py
@@ -392,7 +392,8 @@ def compute_loss(  # pylint: disable=W0221
 
         if self._orig_loss and hasattr(self._orig_loss, "reduction"):
             prev_reduction = self._orig_loss.reduction
-            self._orig_loss.reduction = self._losses.Reduction.NONE
+            if hasattr(self._losses, "Reduction"):
+                self._orig_loss.reduction = self._losses.Reduction.NONE
             loss = self._orig_loss(y_preprocessed, predictions)
             self._orig_loss.reduction = prev_reduction
         else:
@@ -401,7 +402,8 @@ def compute_loss(  # pylint: disable=W0221
             y_preprocessed = k.constant(y_preprocessed)
             for loss_function in self._model.loss_functions:
                 prev_reduction.append(loss_function.reduction)
-                loss_function.reduction = self._losses.Reduction.NONE
+                if hasattr(self._losses, "Reduction"):
+                    loss_function.reduction = self._losses.Reduction.NONE
             loss = self._loss_function(y_preprocessed, predictions)
             for i, loss_function in enumerate(self._model.loss_functions):
                 loss_function.reduction = prev_reduction[i]
diff --git a/art/estimators/classification/pytorch.py b/art/estimators/classification/pytorch.py
@@ -834,7 +834,11 @@ def get_activations(
         self._model.eval()
 
         # Apply defences
-        x_preprocessed, _ = self._apply_preprocessing(x=x, y=None, fit=False)
+        if framework:
+            no_grad = False
+        else:
+            no_grad = True
+        x_preprocessed, _ = self._apply_preprocessing(x=x, y=None, fit=False, no_grad=no_grad)
 
         # Get index of the extracted layer
         if isinstance(layer, six.string_types):
@@ -849,9 +853,9 @@ def get_activations(
             raise TypeError("Layer must be of type str or int")
 
         if framework:
-            if isinstance(x, torch.Tensor):
-                return self._model(x)[layer_index]
-            return self._model(torch.from_numpy(x).to(self._device))[layer_index]
+            if isinstance(x_preprocessed, torch.Tensor):
+                return self._model(x_preprocessed)[layer_index]
+            return self._model(torch.from_numpy(x_preprocessed).to(self._device))[layer_index]
 
         # Run prediction with batch processing
         results = []
diff --git a/art/estimators/object_tracking/pytorch_goturn.py b/art/estimators/object_tracking/pytorch_goturn.py
@@ -670,10 +670,11 @@ def predict(self, x: np.ndarray, batch_size: int = 128, **kwargs) -> List[Dict[s
                 x_i = x[i].to(self.device)
 
             # Apply preprocessing
+            x_i = torch.unsqueeze(x_i, dim=0)
             x_i, _ = self._apply_preprocessing(x_i, y=None, fit=False, no_grad=False)
+            x_i = torch.squeeze(x_i)
 
             y_pred = self._track(x=x_i, y_init=y_init[i])
-
             prediction_dict = dict()
             if isinstance(x, np.ndarray):
                 prediction_dict["boxes"] = y_pred.detach().cpu().numpy()
diff --git a/art/utils.py b/art/utils.py
@@ -975,17 +975,20 @@ def load_diabetes(raw: bool = False, test_set: float = 0.3) -> DATASET_TYPE:
     return (x_train, y_train), (x_test, y_test), min_, max_
 
 
-def load_nursery(raw: bool = False, test_set: float = 0.2, transform_social: bool = False) -> DATASET_TYPE:
+def load_nursery(
+    raw: bool = False, scaled: bool = True, test_set: float = 0.2, transform_social: bool = False
+) -> DATASET_TYPE:
     """
     Loads the UCI Nursery dataset from `config.ART_DATA_PATH` or downloads it if necessary.
 
     :param raw: `True` if no preprocessing should be applied to the data. Otherwise, categorical data is one-hot
-                encoded and data is scaled using sklearn's StandardScaler.
+                encoded and data is scaled using sklearn's StandardScaler according to the value of `scaled`.
+    :param scaled: `True` if data should be scaled.
     :param test_set: Proportion of the data to use as validation split. The value should be between 0 and 1.
     :param transform_social: If `True`, transforms the social feature to be binary for the purpose of attribute
                              inference. This is done by assigning the original value 'problematic' the new value 1, and
                              the other original values are assigned the new value 0.
-    :return: Entire dataset and labels.
+    :return: Entire dataset and labels as numpy array.
     """
     import pandas as pd
     import sklearn.preprocessing
@@ -1050,16 +1053,20 @@ def modify_social(value):
         data = data.drop(features_to_remove, axis=1)
 
         # normalize data
-        label = data.loc[:, "label"]
-        features = data.drop(["label"], axis=1)
-        scaler = sklearn.preprocessing.StandardScaler()
-        scaler.fit(features)
-        scaled_features = pd.DataFrame(scaler.transform(features), columns=features.columns)
-
-        data = pd.concat([label, scaled_features], axis=1, join="inner")
+        if scaled:
+            label = data.loc[:, "label"]
+            features = data.drop(["label"], axis=1)
+            scaler = sklearn.preprocessing.StandardScaler()
+            scaler.fit(features)
+            scaled_features = pd.DataFrame(scaler.transform(features), columns=features.columns)
+            data = pd.concat([label, scaled_features], axis=1, join="inner")
 
     features = data.drop(["label"], axis=1)
-    min_, max_ = np.amin(features.to_numpy()), np.amax(features.to_numpy())
+    if raw:
+        numeric_features = features.drop(categorical_features, axis=1).to_numpy().astype(np.int32)
+        min_, max_ = np.amin(numeric_features), np.amax(numeric_features)
+    else:
+        min_, max_ = np.amin(features.to_numpy().astype(np.float64)), np.amax(features.to_numpy().astype(np.float64))
 
     # Split training and test sets
     stratified = sklearn.model_selection.StratifiedShuffleSplit(n_splits=1, test_size=test_set, random_state=18)
@@ -1071,6 +1078,10 @@ def modify_social(value):
     x_test = test.drop(["label"], axis=1).to_numpy()
     y_test = test.loc[:, "label"].to_numpy()
 
+    if not raw and not scaled:
+        x_train = x_train.astype(np.float64)
+        x_test = x_test.astype(np.float64)
+
     return (x_train, y_train), (x_test, y_test), min_, max_
 
 
diff --git a/docs/conf.py b/docs/conf.py
@@ -27,7 +27,7 @@
 # The short X.Y version
 version = "1.9"
 # The full version, including alpha/beta/rc tags
-release = "1.9.0"
+release = "1.9.1.dev0"
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -387,6 +387,16 @@ def test_stl(self):
         self.assertEqual(x_test.shape[0], y_test.shape[0])
 
     def test_nursery(self):
+        (x_train, y_train), (x_test, y_test), min_, max_ = load_nursery(raw=True)
+        self.assertEqual(x_train.shape[0], y_train.shape[0])
+        self.assertEqual(x_test.shape[0], y_test.shape[0])
+
+        (x_train, y_train), (x_test, y_test), min_, max_ = load_nursery(scaled=False)
+        self.assertEqual(min_, 0.0)
+        self.assertEqual(max_, 4.0)
+        self.assertEqual(x_train.shape[0], y_train.shape[0])
+        self.assertEqual(x_test.shape[0], y_test.shape[0])
+
         (x_train, y_train), (x_test, y_test), min_, max_ = load_nursery()
         self.assertAlmostEqual(min_, -1.3419307411337875, places=6)
         self.assertEqual(max_, 2.0007720517562224)