Merge pull request #1210 from Trusted-AI/feature/pytorch-wrapper-mp3-defense

beat-buesser · web-flow · commit 8651b4c36cfd · 2021-07-09T11:36:12.000+01:00
Pytorch wrapper MP3 defense
diff --git a/art/defences/preprocessor/__init__.py b/art/defences/preprocessor/__init__.py
@@ -3,10 +3,11 @@
 """
 from art.defences.preprocessor.feature_squeezing import FeatureSqueezing
 from art.defences.preprocessor.gaussian_augmentation import GaussianAugmentation
-from art.defences.preprocessor.inverse_gan import InverseGAN, DefenseGAN
+from art.defences.preprocessor.inverse_gan import DefenseGAN, InverseGAN
 from art.defences.preprocessor.jpeg_compression import JpegCompression
 from art.defences.preprocessor.label_smoothing import LabelSmoothing
 from art.defences.preprocessor.mp3_compression import Mp3Compression
+from art.defences.preprocessor.mp3_compression_pytorch import Mp3CompressionPyTorch
 from art.defences.preprocessor.pixel_defend import PixelDefend
 from art.defences.preprocessor.preprocessor import Preprocessor
 from art.defences.preprocessor.resample import Resample
diff --git a/art/defences/preprocessor/mp3_compression.py b/art/defences/preprocessor/mp3_compression.py
@@ -72,7 +72,7 @@ def __call__(self, x: np.ndarray, y: Optional[np.ndarray] = None) -> Tuple[np.nd
         Apply MP3 compression to sample `x`.
 
         :param x: Sample to compress with shape `(batch_size, length, channel)` or an array of sample arrays with shape
-                  (length,) or (length, channel). `x` values are recommended to be of type `np.int16`.
+                  (length,) or (length, channel).
         :param y: Labels of the sample `x`. This function does not affect them in any way.
         :return: Compressed sample.
         """
@@ -84,11 +84,12 @@ def wav_to_mp3(x, sample_rate):
             from pydub import AudioSegment
             from scipy.io.wavfile import write
 
+            x_dtype = x.dtype
             normalized = bool(x.min() >= -1.0 and x.max() <= 1.0)
-            if x.dtype != np.int16 and not normalized:
+            if x_dtype != np.int16 and not normalized:
                 # input is not of type np.int16 and seems to be unnormalized. Therefore casting to np.int16.
                 x = x.astype(np.int16)
-            elif x.dtype != np.int16 and normalized:
+            elif x_dtype != np.int16 and normalized:
                 # x is not of type np.int16 and seems to be normalized. Therefore undoing normalization and
                 # casting to np.int16.
                 x = (x * 2 ** 15).astype(np.int16)
@@ -100,7 +101,19 @@ def wav_to_mp3(x, sample_rate):
             tmp_wav.close()
             tmp_mp3.close()
             x_mp3 = np.array(audio_segment.get_array_of_samples()).reshape((-1, audio_segment.channels))
-            return x_mp3
+
+            # WARNING: Sometimes we *still* need to manually resize x_mp3 to original length.
+            # This should not be the case, e.g. see https://github.com/jiaaro/pydub/issues/474
+            if x.shape[0] != x_mp3.shape[0]:
+                logger.warning(
+                    "Lengths original input and compressed output don't match. Truncating compressed result."
+                )
+                x_mp3 = x_mp3[: x.shape[0]]
+
+            if normalized:
+                # x was normalized. Therefore normalizing x_mp3.
+                x_mp3 = x_mp3 * 2 ** -15
+            return x_mp3.astype(x_dtype)
 
         if x.dtype != np.object and x.ndim != 3:
             raise ValueError("Mp3 compression can only be applied to temporal data across at least one channel.")
diff --git a/art/defences/preprocessor/mp3_compression_pytorch.py b/art/defences/preprocessor/mp3_compression_pytorch.py
@@ -0,0 +1,131 @@
+# MIT License
+#
+# Copyright (C) The Adversarial Robustness Toolbox (ART) Authors 2021
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
+# documentation files (the "Software"), to deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+"""
+This module implements a wrapper for MP3 compression defence.
+
+| Please keep in mind the limitations of defences. For details on how to evaluate classifier security in general,
+    see https://arxiv.org/abs/1902.06705.
+"""
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+from typing import TYPE_CHECKING, Optional, Tuple
+
+import numpy as np
+
+from art.defences.preprocessor.mp3_compression import Mp3Compression
+from art.defences.preprocessor.preprocessor import PreprocessorPyTorch
+
+logger = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    # pylint: disable=C0412
+    import torch
+
+
+class Mp3CompressionPyTorch(PreprocessorPyTorch):
+    """
+    Implement the MP3 compression defense approach.
+    """
+
+    params = ["channels_first", "sample_rate", "verbose"]
+
+    def __init__(
+        self,
+        sample_rate: int,
+        channels_first: bool = False,
+        apply_fit: bool = False,
+        apply_predict: bool = True,
+        verbose: bool = False,
+        device_type: str = "gpu",
+    ):
+        """
+        Create an instance of MP3 compression.
+
+        :param sample_rate: Specifies the sampling rate of sample.
+        :param channels_first: Set channels first or last.
+        :param apply_fit: True if applied during fitting/training.
+        :param apply_predict: True if applied during predicting.
+        :param verbose: Show progress bars.
+        :param device_type: Type of device on which the classifier is run, either `gpu` or `cpu`.
+        """
+        import torch  # lgtm [py/repeated-import]
+        from torch.autograd import Function
+
+        super().__init__(is_fitted=True, apply_fit=apply_fit, apply_predict=apply_predict)
+        self.channels_first = channels_first
+        self.sample_rate = sample_rate
+        self.verbose = verbose
+        self._check_params()
+
+        # Set device
+        if device_type == "cpu" or not torch.cuda.is_available():
+            self._device = torch.device("cpu")
+        else:
+            cuda_idx = torch.cuda.current_device()
+            self._device = torch.device("cuda:{}".format(cuda_idx))
+
+        self.compression_numpy = Mp3Compression(
+            sample_rate=sample_rate,
+            channels_first=channels_first,
+            apply_fit=apply_fit,
+            apply_predict=apply_predict,
+            verbose=verbose,
+        )
+
+        class CompressionPyTorchNumpy(Function):
+            """
+            Function running Preprocessor.
+            """
+
+            @staticmethod
+            def forward(ctx, input):  # pylint: disable=W0622,W0221
+                numpy_input = input.detach().cpu().numpy()
+                result, _ = self.compression_numpy(numpy_input)
+                return input.new(result)
+
+            @staticmethod
+            def backward(ctx, grad_output):  # pylint: disable=W0221
+                numpy_go = grad_output.cpu().numpy()
+                np.expand_dims(input, axis=[0, 2])
+                result = self.compression_numpy.estimate_gradient(None, numpy_go)
+                result = result.squeeze()
+                return grad_output.new(result)
+
+        self._compression_pytorch_numpy = CompressionPyTorchNumpy
+
+    def forward(
+        self, x: "torch.Tensor", y: Optional["torch.Tensor"] = None
+    ) -> Tuple["torch.Tensor", Optional["torch.Tensor"]]:
+        """
+        Apply MP3 compression to sample `x`.
+
+        :param x: Sample to compress with shape `(length, channel)` or an array of sample arrays with shape
+                  (length,) or (length, channel).
+        :param y: Labels of the sample `x`. This function does not affect them in any way.
+        :return: Compressed sample.
+        """
+        x_compressed = self._compression_pytorch_numpy.apply(x)
+        return x_compressed, y
+
+    def _check_params(self) -> None:
+        if not (isinstance(self.sample_rate, (int, np.int)) and self.sample_rate > 0):
+            raise ValueError("Sample rate be must a positive integer.")
+
+        if not isinstance(self.verbose, bool):
+            raise ValueError("The argument `verbose` has to be of type bool.")