@@ -83,10 +83,10 @@ def __init__(
83
83
'input_dim': the number of discrete values, normally 257.
84
84
'embedding_size': size of the embedding layer. Default 8.
85
85
:param num_of_iterations: The number of iterations to apply.
86
- :param l_0: l_0 bound for the attack. If less then 1 it is interpreted as a fraction of the file size.
86
+ :param l_0: l_0 bound for the attack. If less than 1 it is interpreted as a fraction of the file size.
87
87
If larger than 1 it is interpreted as the total number of permissible features to change.
88
88
:param l_r: Learning rate for the optimisation
89
- :param use_sign: If we want to use the sign of the gradient, rather then the gradient itself.
89
+ :param use_sign: If we want to use the sign of the gradient, rather than the gradient itself.
90
90
:param verbose: Show progress bars.
91
91
"""
92
92
super ().__init__ (estimator = classifier )
@@ -333,7 +333,7 @@ def pull_out_adversarial_malware(
333
333
334
334
:param x: Batch of data which will contain a mix of adversarial examples and unperturbed data.
335
335
:param y: Labels indicating which are valid adversarial examples or not.
336
- :param initial_dtype: Data can be given in a few formats (uin16, float, etc) so use initial_dtype
336
+ :param initial_dtype: Data can be given in a few formats (uin16, float, etc. ) so use initial_dtype
337
337
to make the returned sample match the original.
338
338
:param sample_sizes: Size of the original data files
339
339
:param input_perturb_sizes: List of length batch size, each element is in itself a list containing
@@ -346,7 +346,7 @@ def pull_out_adversarial_malware(
346
346
"""
347
347
num_of_malware_samples = int (np .sum (y ))
348
348
349
- # make array and allocate, much faster then appending to list and converting
349
+ # make array and allocate, much faster than appending to list and converting
350
350
adv_x = np .zeros ((num_of_malware_samples , x .shape [1 ]), dtype = initial_dtype )
351
351
adv_y = np .ones ((num_of_malware_samples , 1 ))
352
352
@@ -475,7 +475,7 @@ def generate( # pylint: disable=W0221
475
475
) -> np .ndarray :
476
476
"""
477
477
Generates the adversarial examples. x needs to be composed of valid files by default which can support the
478
- adversarial perturbation and so are malicious and can support the assigned L0 budget. They can obtained by
478
+ adversarial perturbation and so are malicious and can support the assigned L0 budget. They can be obtained by
479
479
using `pull_out_valid_samples` on the data.
480
480
481
481
This check on the input data can be over-ridden by toggling the flag verify_input_data
@@ -488,7 +488,7 @@ def generate( # pylint: disable=W0221
488
488
After all the regions marked in perturb_sizes and perturb_starts have been assigned and automatically_append is
489
489
set to true and remaining l0 perturbation the extra perturbation is added at the end in an append style attack.
490
490
491
- :param x: A array with input data.
491
+ :param x: An array with input data.
492
492
:param y: (N, 1) binary labels to make sure the benign files are zero masked.
493
493
:param sample_sizes: The size of the original file, before it was padded to the input size required by MalConv
494
494
:param automatically_append: Whether to automatically append extra spare perturbation at the end of the file.
@@ -564,7 +564,7 @@ def generate( # pylint: disable=W0221
564
564
for _ in trange (self .num_of_iterations , desc = "PE Adv. Malware" , disable = not self .verbose ):
565
565
gradients = self .estimator .class_gradient (embeddings , label = 0 )
566
566
# go from (bsize x 1 x features x embedding size) -> (bsize x features x embedding size) in a
567
- # framework agnostic manner.
567
+ # framework- agnostic manner.
568
568
gradients = gradients [:, 0 , :, :]
569
569
gradients = - 1 * gradients
570
570
embeddings = self .update_embeddings (embeddings , gradients , mask )
@@ -590,7 +590,7 @@ def process_file(
590
590
Go from raw file to numpy array.
591
591
592
592
:param filepath: Path to the file we convert to a numpy array
593
- :param padding_char: The char to use to pad the input if it is shorter then maxlen
593
+ :param padding_char: The char to use to pad the input if it is shorter than maxlen
594
594
:param maxlen: Maximum size of the file processed by the model. Currently set to 1MB
595
595
:return data: A numpy array of the PE file
596
596
:return size_of_original_file: Size of the PE file
@@ -626,17 +626,22 @@ def get_peinfo(
626
626
627
627
cleaned_dump = {}
628
628
629
- binary = lief .parse (filepath ) # pylint: disable=I1101
629
+ binary_load = lief .parse (filepath ) # pylint: disable=I1101
630
+ if binary_load is not None :
631
+ binary = binary_load
632
+ else :
633
+ raise ValueError ("Failed to load binary." )
634
+
630
635
for section in binary .sections :
631
636
section_info = {}
632
- slack = section .sizeof_raw_data - section .virtual_size
633
- section_info ["PointerToRawData" ] = section .pointerto_raw_data
634
- section_info ["VirtualAddress" ] = section .virtual_size
635
- section_info ["SizeOfRawData" ] = section .sizeof_raw_data
637
+ slack = section .sizeof_raw_data - section .virtual_size # type: ignore
638
+ section_info ["PointerToRawData" ] = section .pointerto_raw_data # type: ignore
639
+ section_info ["VirtualAddress" ] = section .virtual_size # type: ignore
640
+ section_info ["SizeOfRawData" ] = section .sizeof_raw_data # type: ignore
636
641
cleaned_dump [section .name ] = section_info
637
642
if slack > 0 :
638
643
size_of_slack .append (slack )
639
- start_of_slack .append (section .pointerto_raw_data + section .virtual_size )
644
+ start_of_slack .append (section .pointerto_raw_data + section .virtual_size ) # type: ignore
640
645
641
646
if save_to_json_path is not None :
642
647
with open (save_to_json_path , "w" , encoding = "utf8" ) as outfile :
@@ -675,7 +680,7 @@ def insert_section(
675
680
:param bytes_to_assign: (Optional) how many bytes we wish to specify when inserting a new section.
676
681
If unspecified the whole l0 budget will be used on a single section.
677
682
:param verbose: lief outputs a lot to the console, particularly if we are processing many files.
678
- By default suppress printing of messages. Can be toggled on/off by True/False
683
+ By default, suppress printing of messages. Can be toggled on/off by True/False
679
684
:return manipulated_data: Executable with section inserted and turned into a numpy array of
680
685
the appropriate size
681
686
:return len(manipulated_file): Size of original file
@@ -690,7 +695,11 @@ def insert_section(
690
695
if not verbose :
691
696
lief .logging .disable ()
692
697
693
- binary = lief .PE .parse (datapoint )
698
+ binary_parse = lief .PE .parse (datapoint )
699
+ if binary_parse is not None :
700
+ binary = binary_parse
701
+ else :
702
+ raise ValueError ("Failed to load binary." )
694
703
695
704
name_in_use = True
696
705
while name_in_use :
@@ -705,8 +714,8 @@ def insert_section(
705
714
new_section = lief .PE .Section (new_section_name )
706
715
707
716
if bytes_to_assign is None :
708
- if self .l_0 < 1 : # l0 is a fraction of the filesize
709
- # if its a filepath we need to get the file size
717
+ if self .l_0 < 1 : # l0 is a fraction of the file size
718
+ # if it's a filepath we need to get the file size
710
719
if isinstance (datapoint , str ):
711
720
with open (datapoint , "rb" ) as file :
712
721
open_file = file .read ()
@@ -721,9 +730,9 @@ def insert_section(
721
730
perturbation_size = int (sample_size * self .l_0 )
722
731
else : # or l0 is interpreted as total perturbation size
723
732
perturbation_size = int (self .l_0 )
724
- new_section .content = [random .randint (0 , 255 ) for _ in range (perturbation_size )]
733
+ new_section .content = [random .randint (0 , 255 ) for _ in range (perturbation_size )] # type: ignore
725
734
else :
726
- new_section .content = [random .randint (0 , 255 ) for _ in range (bytes_to_assign )]
735
+ new_section .content = [random .randint (0 , 255 ) for _ in range (bytes_to_assign )] # type: ignore
727
736
728
737
# we add the new section at the end of the existing sections
729
738
section_end_points = []
@@ -766,7 +775,7 @@ def insert_section(
766
775
767
776
manipulated_data = np .ones ((maxlen ,), dtype = np .uint16 ) * padding_char
768
777
769
- # Only process files which are less then the max file size supported
778
+ # Only process files which are less than the max file size supported
770
779
if len (manipulated_file ) < maxlen :
771
780
manipulated_data [: len (manipulated_file )] = manipulated_file [:maxlen ]
772
781
0 commit comments