Skip to content

Commit 3f0f000

Browse files
committed
Fix mypy warnings
Signed-off-by: Beat Buesser <[email protected]>
1 parent ecd8200 commit 3f0f000

File tree

1 file changed

+29
-21
lines changed

1 file changed

+29
-21
lines changed

art/attacks/evasion/pe_malware_attack.py

Lines changed: 29 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,10 @@ def __init__(
8383
'input_dim': the number of discrete values, normally 257.
8484
'embedding_size': size of the embedding layer. Default 8.
8585
:param num_of_iterations: The number of iterations to apply.
86-
:param l_0: l_0 bound for the attack. If less then 1 it is interpreted as a fraction of the file size.
86+
:param l_0: l_0 bound for the attack. If less than 1 it is interpreted as a fraction of the file size.
8787
If larger than 1 it is interpreted as the total number of permissible features to change.
8888
:param l_r: Learning rate for the optimisation
89-
:param use_sign: If we want to use the sign of the gradient, rather then the gradient itself.
89+
:param use_sign: If we want to use the sign of the gradient, rather than the gradient itself.
9090
:param verbose: Show progress bars.
9191
"""
9292
super().__init__(estimator=classifier)
@@ -333,7 +333,7 @@ def pull_out_adversarial_malware(
333333
334334
:param x: Batch of data which will contain a mix of adversarial examples and unperturbed data.
335335
:param y: Labels indicating which are valid adversarial examples or not.
336-
:param initial_dtype: Data can be given in a few formats (uin16, float, etc) so use initial_dtype
336+
:param initial_dtype: Data can be given in a few formats (uin16, float, etc.) so use initial_dtype
337337
to make the returned sample match the original.
338338
:param sample_sizes: Size of the original data files
339339
:param input_perturb_sizes: List of length batch size, each element is in itself a list containing
@@ -346,7 +346,7 @@ def pull_out_adversarial_malware(
346346
"""
347347
num_of_malware_samples = int(np.sum(y))
348348

349-
# make array and allocate, much faster then appending to list and converting
349+
# make array and allocate, much faster than appending to list and converting
350350
adv_x = np.zeros((num_of_malware_samples, x.shape[1]), dtype=initial_dtype)
351351
adv_y = np.ones((num_of_malware_samples, 1))
352352

@@ -475,7 +475,7 @@ def generate( # pylint: disable=W0221
475475
) -> np.ndarray:
476476
"""
477477
Generates the adversarial examples. x needs to be composed of valid files by default which can support the
478-
adversarial perturbation and so are malicious and can support the assigned L0 budget. They can obtained by
478+
adversarial perturbation and so are malicious and can support the assigned L0 budget. They can be obtained by
479479
using `pull_out_valid_samples` on the data.
480480
481481
This check on the input data can be over-ridden by toggling the flag verify_input_data
@@ -488,7 +488,7 @@ def generate( # pylint: disable=W0221
488488
After all the regions marked in perturb_sizes and perturb_starts have been assigned and automatically_append is
489489
set to true and remaining l0 perturbation the extra perturbation is added at the end in an append style attack.
490490
491-
:param x: A array with input data.
491+
:param x: An array with input data.
492492
:param y: (N, 1) binary labels to make sure the benign files are zero masked.
493493
:param sample_sizes: The size of the original file, before it was padded to the input size required by MalConv
494494
:param automatically_append: Whether to automatically append extra spare perturbation at the end of the file.
@@ -564,7 +564,7 @@ def generate( # pylint: disable=W0221
564564
for _ in trange(self.num_of_iterations, desc="PE Adv. Malware", disable=not self.verbose):
565565
gradients = self.estimator.class_gradient(embeddings, label=0)
566566
# go from (bsize x 1 x features x embedding size) -> (bsize x features x embedding size) in a
567-
# framework agnostic manner.
567+
# framework-agnostic manner.
568568
gradients = gradients[:, 0, :, :]
569569
gradients = -1 * gradients
570570
embeddings = self.update_embeddings(embeddings, gradients, mask)
@@ -590,7 +590,7 @@ def process_file(
590590
Go from raw file to numpy array.
591591
592592
:param filepath: Path to the file we convert to a numpy array
593-
:param padding_char: The char to use to pad the input if it is shorter then maxlen
593+
:param padding_char: The char to use to pad the input if it is shorter than maxlen
594594
:param maxlen: Maximum size of the file processed by the model. Currently set to 1MB
595595
:return data: A numpy array of the PE file
596596
:return size_of_original_file: Size of the PE file
@@ -626,17 +626,21 @@ def get_peinfo(
626626

627627
cleaned_dump = {}
628628

629-
binary = lief.parse(filepath) # pylint: disable=I1101
629+
binary_load = lief.parse(filepath) # pylint: disable=I1101
630+
if binary_load is None:
631+
raise ValueError("Failed to load binary.")
632+
else:
633+
binary = binary_load
630634
for section in binary.sections:
631635
section_info = {}
632-
slack = section.sizeof_raw_data - section.virtual_size
633-
section_info["PointerToRawData"] = section.pointerto_raw_data
634-
section_info["VirtualAddress"] = section.virtual_size
635-
section_info["SizeOfRawData"] = section.sizeof_raw_data
636+
slack = section.sizeof_raw_data - section.virtual_size # type: ignore
637+
section_info["PointerToRawData"] = section.pointerto_raw_data # type: ignore
638+
section_info["VirtualAddress"] = section.virtual_size # type: ignore
639+
section_info["SizeOfRawData"] = section.sizeof_raw_data # type: ignore
636640
cleaned_dump[section.name] = section_info
637641
if slack > 0:
638642
size_of_slack.append(slack)
639-
start_of_slack.append(section.pointerto_raw_data + section.virtual_size)
643+
start_of_slack.append(section.pointerto_raw_data + section.virtual_size) # type: ignore
640644

641645
if save_to_json_path is not None:
642646
with open(save_to_json_path, "w", encoding="utf8") as outfile:
@@ -675,7 +679,7 @@ def insert_section(
675679
:param bytes_to_assign: (Optional) how many bytes we wish to specify when inserting a new section.
676680
If unspecified the whole l0 budget will be used on a single section.
677681
:param verbose: lief outputs a lot to the console, particularly if we are processing many files.
678-
By default suppress printing of messages. Can be toggled on/off by True/False
682+
By default, suppress printing of messages. Can be toggled on/off by True/False
679683
:return manipulated_data: Executable with section inserted and turned into a numpy array of
680684
the appropriate size
681685
:return len(manipulated_file): Size of original file
@@ -690,7 +694,11 @@ def insert_section(
690694
if not verbose:
691695
lief.logging.disable()
692696

693-
binary = lief.PE.parse(datapoint)
697+
binary_parse = lief.PE.parse(datapoint)
698+
if binary_parse is None:
699+
raise ValueError("Failed to load binary.")
700+
else:
701+
binary = binary_parse
694702

695703
name_in_use = True
696704
while name_in_use:
@@ -705,8 +713,8 @@ def insert_section(
705713
new_section = lief.PE.Section(new_section_name)
706714

707715
if bytes_to_assign is None:
708-
if self.l_0 < 1: # l0 is a fraction of the filesize
709-
# if its a filepath we need to get the file size
716+
if self.l_0 < 1: # l0 is a fraction of the file size
717+
# if it's a filepath we need to get the file size
710718
if isinstance(datapoint, str):
711719
with open(datapoint, "rb") as file:
712720
open_file = file.read()
@@ -721,9 +729,9 @@ def insert_section(
721729
perturbation_size = int(sample_size * self.l_0)
722730
else: # or l0 is interpreted as total perturbation size
723731
perturbation_size = int(self.l_0)
724-
new_section.content = [random.randint(0, 255) for _ in range(perturbation_size)]
732+
new_section.content = [random.randint(0, 255) for _ in range(perturbation_size)] # type: ignore
725733
else:
726-
new_section.content = [random.randint(0, 255) for _ in range(bytes_to_assign)]
734+
new_section.content = [random.randint(0, 255) for _ in range(bytes_to_assign)] # type: ignore
727735

728736
# we add the new section at the end of the existing sections
729737
section_end_points = []
@@ -766,7 +774,7 @@ def insert_section(
766774

767775
manipulated_data = np.ones((maxlen,), dtype=np.uint16) * padding_char
768776

769-
# Only process files which are less then the max file size supported
777+
# Only process files which are less than the max file size supported
770778
if len(manipulated_file) < maxlen:
771779
manipulated_data[: len(manipulated_file)] = manipulated_file[:maxlen]
772780

0 commit comments

Comments
 (0)