Merge pull request #2419 from Trusted-AI/dependabot/pip/lief-0.14.1

beat-buesser · web-flow · commit 85b45aaf0f64 · 2024-03-22T22:01:14.000+01:00
Bump lief from 0.12.3 to 0.14.1
diff --git a/.github/workflows/ci-lingvo.yml b/.github/workflows/ci-lingvo.yml
@@ -50,7 +50,7 @@ jobs:
           sudo apt-get update
           sudo apt-get -y -q install ffmpeg libavcodec-extra
           python -m pip install --upgrade pip setuptools wheel
-          pip install -q -r <(sed '/^scipy/d;/^matplotlib/d;/^pandas/d;/^statsmodels/d;/^numba/d;/^jax/d;/^h5py/d;/^Pillow/d;/^pytest/d;/^pytest-mock/d;/^torch/d;/^torchaudio/d;/^torchvision/d;/^xgboost/d;/^requests/d;/^tensorflow/d;/^keras/d;/^kornia/d;/^librosa/d;/^tqdm/d;/^timm/d;/^catboost/d;/^scikit-learn/d;/^GPy/d' requirements_test.txt)
+          pip install -q -r <(sed '/^scipy/d;/^matplotlib/d;/^pandas/d;/^statsmodels/d;/^numba/d;/^jax/d;/^h5py/d;/^Pillow/d;/^pytest/d;/^pytest-mock/d;/^torch/d;/^torchaudio/d;/^torchvision/d;/^xgboost/d;/^requests/d;/^tensorflow/d;/^keras/d;/^kornia/d;/^librosa/d;/^tqdm/d;/^timm/d;/^catboost/d;/^scikit-learn/d;/^GPy/d;/^lief/d' requirements_test.txt)
           pip install scipy==1.5.4
           pip install matplotlib==3.3.4
           pip install pandas==1.1.5
@@ -77,6 +77,7 @@ jobs:
           pip install catboost==1.1.1
           pip install scikit-learn==0.24.2
           pip install GPy==1.10.0
+          pip install lief==0.12.3
           pip list
       - name: Run ${{ matrix.name }} Tests
         run: pytest --cov-report=xml --cov=art --cov-append -q -vv tests/estimators/speech_recognition/test_tensorflow_lingvo.py --framework=${{ matrix.framework }} --durations=0
diff --git a/.github/workflows/ci-tensorflow-v1.yml b/.github/workflows/ci-tensorflow-v1.yml
@@ -48,7 +48,7 @@ jobs:
           sudo apt-get update
           sudo apt-get -y -q install ffmpeg libavcodec-extra
           python -m pip install --upgrade pip setuptools wheel
-          pip install -q -r <(sed '/^pandas/d;/^scipy/d;/^matplotlib/d;/^xgboost/d;/^tensorflow/d;/^keras/d;/^jax/d;/^torch/d;/^Pillow/d;/^h5py/d;/^kornia/d;/^scikit-learn/d;/^pytest-mock/d;/^GPy/d' requirements_test.txt)
+          pip install -q -r <(sed '/^pandas/d;/^scipy/d;/^matplotlib/d;/^xgboost/d;/^tensorflow/d;/^keras/d;/^jax/d;/^torch/d;/^Pillow/d;/^h5py/d;/^kornia/d;/^scikit-learn/d;/^pytest-mock/d;/^GPy/d;/^lief/d' requirements_test.txt)
           pip install pandas==1.3.5
           pip install scipy==1.7.2
           pip install matplotlib==3.5.3
@@ -66,6 +66,7 @@ jobs:
           pip install scikit-learn==1.0.2
           pip install pytest-mock~=3.11.1
           pip install GPy~=1.10.0
+          pip install lief==0.12.3
           pip list
       - name: Run Tests
         run: ./run_tests.sh ${{ matrix.framework }}
diff --git a/art/attacks/evasion/pe_malware_attack.py b/art/attacks/evasion/pe_malware_attack.py
@@ -83,10 +83,10 @@ def __init__(
                           'input_dim': the number of discrete values, normally 257.
                           'embedding_size': size of the embedding layer. Default 8.
         :param num_of_iterations: The number of iterations to apply.
-        :param l_0: l_0 bound for the attack. If less then 1 it is interpreted as a fraction of the file size.
+        :param l_0: l_0 bound for the attack. If less than 1 it is interpreted as a fraction of the file size.
                     If larger than 1 it is interpreted as the total number of permissible features to change.
         :param l_r: Learning rate for the optimisation
-        :param use_sign: If we want to use the sign of the gradient, rather then the gradient itself.
+        :param use_sign: If we want to use the sign of the gradient, rather than the gradient itself.
         :param verbose: Show progress bars.
         """
         super().__init__(estimator=classifier)
@@ -333,7 +333,7 @@ def pull_out_adversarial_malware(
 
         :param x: Batch of data which will contain a mix of adversarial examples and unperturbed data.
         :param y: Labels indicating which are valid adversarial examples or not.
-        :param initial_dtype: Data can be given in a few formats (uin16, float, etc) so use initial_dtype
+        :param initial_dtype: Data can be given in a few formats (uin16, float, etc.) so use initial_dtype
                               to make the returned sample match the original.
         :param sample_sizes: Size of the original data files
         :param input_perturb_sizes: List of length batch size, each element is in itself a list containing
@@ -346,7 +346,7 @@ def pull_out_adversarial_malware(
         """
         num_of_malware_samples = int(np.sum(y))
 
-        # make array and allocate, much faster then appending to list and converting
+        # make array and allocate, much faster than appending to list and converting
         adv_x = np.zeros((num_of_malware_samples, x.shape[1]), dtype=initial_dtype)
         adv_y = np.ones((num_of_malware_samples, 1))
 
@@ -475,7 +475,7 @@ def generate(  # pylint: disable=W0221
     ) -> np.ndarray:
         """
         Generates the adversarial examples. x needs to be composed of valid files by default which can support the
-        adversarial perturbation and so are malicious and can support the assigned L0 budget. They can obtained by
+        adversarial perturbation and so are malicious and can support the assigned L0 budget. They can be obtained by
         using `pull_out_valid_samples` on the data.
 
         This check on the input data can be over-ridden by toggling the flag verify_input_data
@@ -488,7 +488,7 @@ def generate(  # pylint: disable=W0221
         After all the regions marked in perturb_sizes and perturb_starts have been assigned and automatically_append is
         set to true and remaining l0 perturbation the extra perturbation is added at the end in an append style attack.
 
-        :param x: A array with input data.
+        :param x: An array with input data.
         :param y: (N, 1) binary labels to make sure the benign files are zero masked.
         :param sample_sizes: The size of the original file, before it was padded to the input size required by MalConv
         :param automatically_append: Whether to automatically append extra spare perturbation at the end of the file.
@@ -564,7 +564,7 @@ def generate(  # pylint: disable=W0221
         for _ in trange(self.num_of_iterations, desc="PE Adv. Malware", disable=not self.verbose):
             gradients = self.estimator.class_gradient(embeddings, label=0)
             # go from (bsize x 1 x features x embedding size) -> (bsize x features x embedding size) in a
-            # framework agnostic manner.
+            # framework-agnostic manner.
             gradients = gradients[:, 0, :, :]
             gradients = -1 * gradients
             embeddings = self.update_embeddings(embeddings, gradients, mask)
@@ -590,7 +590,7 @@ def process_file(
         Go from raw file to numpy array.
 
         :param filepath: Path to the file we convert to a numpy array
-        :param padding_char: The char to use to pad the input if it is shorter then maxlen
+        :param padding_char: The char to use to pad the input if it is shorter than maxlen
         :param maxlen: Maximum size of the file processed by the model. Currently set to 1MB
         :return data: A numpy array of the PE file
         :return size_of_original_file: Size of the PE file
@@ -626,17 +626,22 @@ def get_peinfo(
 
         cleaned_dump = {}
 
-        binary = lief.parse(filepath)  # pylint: disable=I1101
+        binary_load = lief.parse(filepath)  # pylint: disable=I1101
+        if binary_load is not None:
+            binary = binary_load
+        else:
+            raise ValueError("Failed to load binary.")
+
         for section in binary.sections:
             section_info = {}
-            slack = section.sizeof_raw_data - section.virtual_size
-            section_info["PointerToRawData"] = section.pointerto_raw_data
-            section_info["VirtualAddress"] = section.virtual_size
-            section_info["SizeOfRawData"] = section.sizeof_raw_data
+            slack = section.sizeof_raw_data - section.virtual_size  # type: ignore
+            section_info["PointerToRawData"] = section.pointerto_raw_data  # type: ignore
+            section_info["VirtualAddress"] = section.virtual_size  # type: ignore
+            section_info["SizeOfRawData"] = section.sizeof_raw_data  # type: ignore
             cleaned_dump[section.name] = section_info
             if slack > 0:
                 size_of_slack.append(slack)
-                start_of_slack.append(section.pointerto_raw_data + section.virtual_size)
+                start_of_slack.append(section.pointerto_raw_data + section.virtual_size)  # type: ignore
 
         if save_to_json_path is not None:
             with open(save_to_json_path, "w", encoding="utf8") as outfile:
@@ -675,7 +680,7 @@ def insert_section(
         :param bytes_to_assign: (Optional) how many bytes we wish to specify when inserting a new section.
                                 If unspecified the whole l0 budget will be used on a single section.
         :param verbose: lief outputs a lot to the console, particularly if we are processing many files.
-                        By default suppress printing of messages. Can be toggled on/off by True/False
+                        By default, suppress printing of messages. Can be toggled on/off by True/False
         :return manipulated_data: Executable with section inserted and turned into a numpy array of
                                   the appropriate size
         :return len(manipulated_file): Size of original file
@@ -690,7 +695,11 @@ def insert_section(
         if not verbose:
             lief.logging.disable()
 
-        binary = lief.PE.parse(datapoint)
+        binary_parse = lief.PE.parse(datapoint)
+        if binary_parse is not None:
+            binary = binary_parse
+        else:
+            raise ValueError("Failed to load binary.")
 
         name_in_use = True
         while name_in_use:
@@ -705,8 +714,8 @@ def insert_section(
         new_section = lief.PE.Section(new_section_name)
 
         if bytes_to_assign is None:
-            if self.l_0 < 1:  # l0 is a fraction of the filesize
-                # if its a filepath we need to get the file size
+            if self.l_0 < 1:  # l0 is a fraction of the file size
+                # if it's a filepath we need to get the file size
                 if isinstance(datapoint, str):
                     with open(datapoint, "rb") as file:
                         open_file = file.read()
@@ -721,9 +730,9 @@ def insert_section(
                 perturbation_size = int(sample_size * self.l_0)
             else:  # or l0 is interpreted as total perturbation size
                 perturbation_size = int(self.l_0)
-            new_section.content = [random.randint(0, 255) for _ in range(perturbation_size)]
+            new_section.content = [random.randint(0, 255) for _ in range(perturbation_size)]  # type: ignore
         else:
-            new_section.content = [random.randint(0, 255) for _ in range(bytes_to_assign)]
+            new_section.content = [random.randint(0, 255) for _ in range(bytes_to_assign)]  # type: ignore
 
         # we add the new section at the end of the existing sections
         section_end_points = []
@@ -766,7 +775,7 @@ def insert_section(
 
         manipulated_data = np.ones((maxlen,), dtype=np.uint16) * padding_char
 
-        # Only process files which are less then the max file size supported
+        # Only process files which are less than the max file size supported
         if len(manipulated_file) < maxlen:
             manipulated_data[: len(manipulated_file)] = manipulated_file[:maxlen]
 
diff --git a/requirements_test.txt b/requirements_test.txt
@@ -45,7 +45,7 @@ xgboost==2.0.3
 
 kornia~=0.7.1
 tensorboardX==2.6.2.2
-lief==0.12.3
+lief==0.14.1
 jax[cpu]==0.4.23
 
 # Lingvo ASR dependencies