Merge remote-tracking branch 'origin' into dev_1.18.0

beat-buesser · beat-buesser · commit 851a1a88afd4 · 2024-04-04T22:07:10.000+02:00
diff --git a/.github/workflows/ci-lingvo.yml b/.github/workflows/ci-lingvo.yml
@@ -50,7 +50,7 @@ jobs:
           sudo apt-get update
           sudo apt-get -y -q install ffmpeg libavcodec-extra
           python -m pip install --upgrade pip setuptools wheel
-          pip install -q -r <(sed '/^scipy/d;/^matplotlib/d;/^pandas/d;/^statsmodels/d;/^numba/d;/^jax/d;/^h5py/d;/^Pillow/d;/^pytest/d;/^pytest-mock/d;/^torch/d;/^torchaudio/d;/^torchvision/d;/^xgboost/d;/^requests/d;/^tensorflow/d;/^keras/d;/^kornia/d;/^librosa/d;/^tqdm/d;/^timm/d;/^catboost/d;/^scikit-learn/d' requirements_test.txt)
+          pip install -q -r <(sed '/^scipy/d;/^matplotlib/d;/^pandas/d;/^statsmodels/d;/^numba/d;/^jax/d;/^h5py/d;/^Pillow/d;/^pytest/d;/^pytest-mock/d;/^torch/d;/^torchaudio/d;/^torchvision/d;/^xgboost/d;/^requests/d;/^tensorflow/d;/^keras/d;/^kornia/d;/^librosa/d;/^tqdm/d;/^timm/d;/^catboost/d;/^scikit-learn/d;/^GPy/d;/^lief/d' requirements_test.txt)
           pip install scipy==1.5.4
           pip install matplotlib==3.3.4
           pip install pandas==1.1.5
@@ -76,6 +76,8 @@ jobs:
           pip install tqdm==4.64.1
           pip install catboost==1.1.1
           pip install scikit-learn==0.24.2
+          pip install GPy==1.10.0
+          pip install lief==0.12.3
           pip list
       - name: Run ${{ matrix.name }} Tests
         run: pytest --cov-report=xml --cov=art --cov-append -q -vv tests/estimators/speech_recognition/test_tensorflow_lingvo.py --framework=${{ matrix.framework }} --durations=0
diff --git a/.github/workflows/ci-pytorch.yml b/.github/workflows/ci-pytorch.yml
@@ -34,12 +34,12 @@ jobs:
             torch: 1.13.1+cpu
             torchvision: 0.14.1+cpu
             torchaudio: 0.13.1
-          - name: PyTorch 2.1.2 (Python 3.10)
+          - name: PyTorch 2.2.1 (Python 3.10)
             framework: pytorch
             python: '3.10'
-            torch: 2.1.2
-            torchvision: 0.16.2+cpu
-            torchaudio: 2.1.2
+            torch: 2.2.1
+            torchvision: 0.17.1+cpu
+            torchaudio: 2.2.1
 
     name: ${{ matrix.name }}
     steps:
diff --git a/.github/workflows/ci-tensorflow-v1.yml b/.github/workflows/ci-tensorflow-v1.yml
@@ -48,7 +48,7 @@ jobs:
           sudo apt-get update
           sudo apt-get -y -q install ffmpeg libavcodec-extra
           python -m pip install --upgrade pip setuptools wheel
-          pip install -q -r <(sed '/^pandas/d;/^scipy/d;/^matplotlib/d;/^xgboost/d;/^tensorflow/d;/^keras/d;/^jax/d;/^torch/d;/^Pillow/d;/^h5py/d;/^scikit-learn/d' requirements_test.txt)
+          pip install -q -r <(sed '/^pandas/d;/^scipy/d;/^matplotlib/d;/^xgboost/d;/^tensorflow/d;/^keras/d;/^jax/d;/^torch/d;/^Pillow/d;/^h5py/d;/^kornia/d;/^scikit-learn/d;/^pytest-mock/d;/^GPy/d;/^lief/d;/^statsmodels/d' requirements_test.txt)
           pip install pandas==1.3.5
           pip install scipy==1.7.2
           pip install matplotlib==3.5.3
@@ -62,7 +62,12 @@ jobs:
           pip install torchvision==0.14.1+cpu
           pip install Pillow==9.5.0
           pip install h5py==3.8.0
+          pip install kornia==0.6.12
           pip install scikit-learn==1.0.2
+          pip install pytest-mock~=3.11.1
+          pip install GPy~=1.10.0
+          pip install lief==0.12.3
+          pip install statsmodels==0.13.5
           pip list
       - name: Run Tests
         run: ./run_tests.sh ${{ matrix.framework }}
diff --git a/.github/workflows/dockerhub.yml b/.github/workflows/dockerhub.yml
@@ -24,22 +24,22 @@ jobs:
         uses: actions/checkout@v3
 
       - name: Log in to Docker Hub
-        uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d
+        uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20
         with:
           username: ${{ secrets.DOCKER_HUB_USERNAME }}
           password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
 
       - name: Extract metadata (tags, labels) for Docker
         id: meta
-        uses: docker/metadata-action@dbef88086f6cef02e264edb7dbf63250c17cef6c
+        uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81
         with:
           images: adversarialrobustnesstoolbox/releases
           tags: |
             type=raw,value={{branch}}-1.17.1-{{sha}}
             type=semver,pattern={{version}}
 
       - name: Build and push Docker image
-        uses: docker/build-push-action@4a13e500e55cf31b7a5d59a38ab2040ab0f42f56
+        uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0
         with:
           context: .
           push: true
diff --git a/art/attacks/evasion/pe_malware_attack.py b/art/attacks/evasion/pe_malware_attack.py
@@ -83,10 +83,10 @@ def __init__(
                           'input_dim': the number of discrete values, normally 257.
                           'embedding_size': size of the embedding layer. Default 8.
         :param num_of_iterations: The number of iterations to apply.
-        :param l_0: l_0 bound for the attack. If less then 1 it is interpreted as a fraction of the file size.
+        :param l_0: l_0 bound for the attack. If less than 1 it is interpreted as a fraction of the file size.
                     If larger than 1 it is interpreted as the total number of permissible features to change.
         :param l_r: Learning rate for the optimisation
-        :param use_sign: If we want to use the sign of the gradient, rather then the gradient itself.
+        :param use_sign: If we want to use the sign of the gradient, rather than the gradient itself.
         :param verbose: Show progress bars.
         """
         super().__init__(estimator=classifier)
@@ -333,7 +333,7 @@ def pull_out_adversarial_malware(
 
         :param x: Batch of data which will contain a mix of adversarial examples and unperturbed data.
         :param y: Labels indicating which are valid adversarial examples or not.
-        :param initial_dtype: Data can be given in a few formats (uin16, float, etc) so use initial_dtype
+        :param initial_dtype: Data can be given in a few formats (uin16, float, etc.) so use initial_dtype
                               to make the returned sample match the original.
         :param sample_sizes: Size of the original data files
         :param input_perturb_sizes: List of length batch size, each element is in itself a list containing
@@ -346,7 +346,7 @@ def pull_out_adversarial_malware(
         """
         num_of_malware_samples = int(np.sum(y))
 
-        # make array and allocate, much faster then appending to list and converting
+        # make array and allocate, much faster than appending to list and converting
         adv_x = np.zeros((num_of_malware_samples, x.shape[1]), dtype=initial_dtype)
         adv_y = np.ones((num_of_malware_samples, 1))
 
@@ -475,7 +475,7 @@ def generate(  # pylint: disable=W0221
     ) -> np.ndarray:
         """
         Generates the adversarial examples. x needs to be composed of valid files by default which can support the
-        adversarial perturbation and so are malicious and can support the assigned L0 budget. They can obtained by
+        adversarial perturbation and so are malicious and can support the assigned L0 budget. They can be obtained by
         using `pull_out_valid_samples` on the data.
 
         This check on the input data can be over-ridden by toggling the flag verify_input_data
@@ -488,7 +488,7 @@ def generate(  # pylint: disable=W0221
         After all the regions marked in perturb_sizes and perturb_starts have been assigned and automatically_append is
         set to true and remaining l0 perturbation the extra perturbation is added at the end in an append style attack.
 
-        :param x: A array with input data.
+        :param x: An array with input data.
         :param y: (N, 1) binary labels to make sure the benign files are zero masked.
         :param sample_sizes: The size of the original file, before it was padded to the input size required by MalConv
         :param automatically_append: Whether to automatically append extra spare perturbation at the end of the file.
@@ -564,7 +564,7 @@ def generate(  # pylint: disable=W0221
         for _ in trange(self.num_of_iterations, desc="PE Adv. Malware", disable=not self.verbose):
             gradients = self.estimator.class_gradient(embeddings, label=0)
             # go from (bsize x 1 x features x embedding size) -> (bsize x features x embedding size) in a
-            # framework agnostic manner.
+            # framework-agnostic manner.
             gradients = gradients[:, 0, :, :]
             gradients = -1 * gradients
             embeddings = self.update_embeddings(embeddings, gradients, mask)
@@ -590,7 +590,7 @@ def process_file(
         Go from raw file to numpy array.
 
         :param filepath: Path to the file we convert to a numpy array
-        :param padding_char: The char to use to pad the input if it is shorter then maxlen
+        :param padding_char: The char to use to pad the input if it is shorter than maxlen
         :param maxlen: Maximum size of the file processed by the model. Currently set to 1MB
         :return data: A numpy array of the PE file
         :return size_of_original_file: Size of the PE file
@@ -626,17 +626,22 @@ def get_peinfo(
 
         cleaned_dump = {}
 
-        binary = lief.parse(filepath)  # pylint: disable=I1101
+        binary_load = lief.parse(filepath)  # pylint: disable=I1101
+        if binary_load is not None:
+            binary = binary_load
+        else:
+            raise ValueError("Failed to load binary.")
+
         for section in binary.sections:
             section_info = {}
-            slack = section.sizeof_raw_data - section.virtual_size
-            section_info["PointerToRawData"] = section.pointerto_raw_data
-            section_info["VirtualAddress"] = section.virtual_size
-            section_info["SizeOfRawData"] = section.sizeof_raw_data
+            slack = section.sizeof_raw_data - section.virtual_size  # type: ignore
+            section_info["PointerToRawData"] = section.pointerto_raw_data  # type: ignore
+            section_info["VirtualAddress"] = section.virtual_size  # type: ignore
+            section_info["SizeOfRawData"] = section.sizeof_raw_data  # type: ignore
             cleaned_dump[section.name] = section_info
             if slack > 0:
                 size_of_slack.append(slack)
-                start_of_slack.append(section.pointerto_raw_data + section.virtual_size)
+                start_of_slack.append(section.pointerto_raw_data + section.virtual_size)  # type: ignore
 
         if save_to_json_path is not None:
             with open(save_to_json_path, "w", encoding="utf8") as outfile:
@@ -675,7 +680,7 @@ def insert_section(
         :param bytes_to_assign: (Optional) how many bytes we wish to specify when inserting a new section.
                                 If unspecified the whole l0 budget will be used on a single section.
         :param verbose: lief outputs a lot to the console, particularly if we are processing many files.
-                        By default suppress printing of messages. Can be toggled on/off by True/False
+                        By default, suppress printing of messages. Can be toggled on/off by True/False
         :return manipulated_data: Executable with section inserted and turned into a numpy array of
                                   the appropriate size
         :return len(manipulated_file): Size of original file
@@ -690,7 +695,11 @@ def insert_section(
         if not verbose:
             lief.logging.disable()
 
-        binary = lief.PE.parse(datapoint)
+        binary_parse = lief.PE.parse(datapoint)
+        if binary_parse is not None:
+            binary = binary_parse
+        else:
+            raise ValueError("Failed to load binary.")
 
         name_in_use = True
         while name_in_use:
@@ -705,8 +714,8 @@ def insert_section(
         new_section = lief.PE.Section(new_section_name)
 
         if bytes_to_assign is None:
-            if self.l_0 < 1:  # l0 is a fraction of the filesize
-                # if its a filepath we need to get the file size
+            if self.l_0 < 1:  # l0 is a fraction of the file size
+                # if it's a filepath we need to get the file size
                 if isinstance(datapoint, str):
                     with open(datapoint, "rb") as file:
                         open_file = file.read()
@@ -721,9 +730,9 @@ def insert_section(
                 perturbation_size = int(sample_size * self.l_0)
             else:  # or l0 is interpreted as total perturbation size
                 perturbation_size = int(self.l_0)
-            new_section.content = [random.randint(0, 255) for _ in range(perturbation_size)]
+            new_section.content = [random.randint(0, 255) for _ in range(perturbation_size)]  # type: ignore
         else:
-            new_section.content = [random.randint(0, 255) for _ in range(bytes_to_assign)]
+            new_section.content = [random.randint(0, 255) for _ in range(bytes_to_assign)]  # type: ignore
 
         # we add the new section at the end of the existing sections
         section_end_points = []
@@ -766,7 +775,7 @@ def insert_section(
 
         manipulated_data = np.ones((maxlen,), dtype=np.uint16) * padding_char
 
-        # Only process files which are less then the max file size supported
+        # Only process files which are less than the max file size supported
         if len(manipulated_file) < maxlen:
             manipulated_data[: len(manipulated_file)] = manipulated_file[:maxlen]
 
diff --git a/requirements_test.txt b/requirements_test.txt
@@ -3,16 +3,16 @@
 numpy>=1.18.5,<1.27
 scipy==1.10.1
 matplotlib==3.7.1
-scikit-learn==1.4.0
+scikit-learn==1.4.1.post1
 six==1.16.0
 Pillow==10.2.0
 tqdm==4.66.1
-statsmodels==0.13.5
+statsmodels==0.14.1
 pydub==0.25.1
 resampy==0.4.2
 ffmpeg-python==0.2.0
 cma==3.3.0
-pandas==2.1.4
+pandas==2.2.1
 librosa==0.10.1
 numba~=0.56.4
 opencv-python
@@ -31,21 +31,21 @@ mxnet-native==1.8.0.post0
 
 # PyTorch
 --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
-torch==2.1.2
-torchaudio==2.1.2
-torchvision==0.16.2+cpu
+torch==2.2.1
+torchaudio==2.2.1
+torchvision==0.17.1+cpu
 
 # PyTorch image transformers
 timm==0.9.2
 
-catboost==1.2.2
-GPy==1.10.0
-lightgbm==4.1.0
-xgboost==2.0.2
+catboost==1.2.3
+GPy==1.13.1
+lightgbm==4.3.0
+xgboost==2.0.3
 
-kornia~=0.6.12
+kornia~=0.7.1
 tensorboardX==2.6.2.2
-lief==0.12.3
+lief==0.14.1
 jax[cpu]==0.4.23
 
 # Lingvo ASR dependencies
@@ -58,7 +58,7 @@ jax[cpu]==0.4.23
 pytest~=7.4.3
 pytest-flake8~=1.1.1
 flake8~=4.0.1
-pytest-mock~=3.10.0
+pytest-mock~=3.12.0
 pytest-cov~=4.1.0
 requests~=2.31.0