@@ -83,10 +83,10 @@ def __init__(
8383 'input_dim': the number of discrete values, normally 257.
8484 'embedding_size': size of the embedding layer. Default 8.
8585 :param num_of_iterations: The number of iterations to apply.
86- :param l_0: l_0 bound for the attack. If less then 1 it is interpreted as a fraction of the file size.
86+ :param l_0: l_0 bound for the attack. If less than 1 it is interpreted as a fraction of the file size.
8787 If larger than 1 it is interpreted as the total number of permissible features to change.
8888 :param l_r: Learning rate for the optimisation
89- :param use_sign: If we want to use the sign of the gradient, rather then the gradient itself.
89+ :param use_sign: If we want to use the sign of the gradient, rather than the gradient itself.
9090 :param verbose: Show progress bars.
9191 """
9292 super ().__init__ (estimator = classifier )
@@ -333,7 +333,7 @@ def pull_out_adversarial_malware(
333333
334334 :param x: Batch of data which will contain a mix of adversarial examples and unperturbed data.
335335 :param y: Labels indicating which are valid adversarial examples or not.
336- :param initial_dtype: Data can be given in a few formats (uin16, float, etc) so use initial_dtype
336+ :param initial_dtype: Data can be given in a few formats (uin16, float, etc. ) so use initial_dtype
337337 to make the returned sample match the original.
338338 :param sample_sizes: Size of the original data files
339339 :param input_perturb_sizes: List of length batch size, each element is in itself a list containing
@@ -346,7 +346,7 @@ def pull_out_adversarial_malware(
346346 """
347347 num_of_malware_samples = int (np .sum (y ))
348348
349- # make array and allocate, much faster then appending to list and converting
349+ # make array and allocate, much faster than appending to list and converting
350350 adv_x = np .zeros ((num_of_malware_samples , x .shape [1 ]), dtype = initial_dtype )
351351 adv_y = np .ones ((num_of_malware_samples , 1 ))
352352
@@ -475,7 +475,7 @@ def generate( # pylint: disable=W0221
475475 ) -> np .ndarray :
476476 """
477477 Generates the adversarial examples. x needs to be composed of valid files by default which can support the
478- adversarial perturbation and so are malicious and can support the assigned L0 budget. They can obtained by
478+ adversarial perturbation and so are malicious and can support the assigned L0 budget. They can be obtained by
479479 using `pull_out_valid_samples` on the data.
480480
481481 This check on the input data can be over-ridden by toggling the flag verify_input_data
@@ -488,7 +488,7 @@ def generate( # pylint: disable=W0221
488488 After all the regions marked in perturb_sizes and perturb_starts have been assigned and automatically_append is
489489 set to true and remaining l0 perturbation the extra perturbation is added at the end in an append style attack.
490490
491- :param x: A array with input data.
491+ :param x: An array with input data.
492492 :param y: (N, 1) binary labels to make sure the benign files are zero masked.
493493 :param sample_sizes: The size of the original file, before it was padded to the input size required by MalConv
494494 :param automatically_append: Whether to automatically append extra spare perturbation at the end of the file.
@@ -564,7 +564,7 @@ def generate( # pylint: disable=W0221
564564 for _ in trange (self .num_of_iterations , desc = "PE Adv. Malware" , disable = not self .verbose ):
565565 gradients = self .estimator .class_gradient (embeddings , label = 0 )
566566 # go from (bsize x 1 x features x embedding size) -> (bsize x features x embedding size) in a
567- # framework agnostic manner.
567+ # framework- agnostic manner.
568568 gradients = gradients [:, 0 , :, :]
569569 gradients = - 1 * gradients
570570 embeddings = self .update_embeddings (embeddings , gradients , mask )
@@ -590,7 +590,7 @@ def process_file(
590590 Go from raw file to numpy array.
591591
592592 :param filepath: Path to the file we convert to a numpy array
593- :param padding_char: The char to use to pad the input if it is shorter then maxlen
593+ :param padding_char: The char to use to pad the input if it is shorter than maxlen
594594 :param maxlen: Maximum size of the file processed by the model. Currently set to 1MB
595595 :return data: A numpy array of the PE file
596596 :return size_of_original_file: Size of the PE file
@@ -626,17 +626,22 @@ def get_peinfo(
626626
627627 cleaned_dump = {}
628628
629- binary = lief .parse (filepath ) # pylint: disable=I1101
629+ binary_load = lief .parse (filepath ) # pylint: disable=I1101
630+ if binary_load is not None :
631+ binary = binary_load
632+ else :
633+ raise ValueError ("Failed to load binary." )
634+
630635 for section in binary .sections :
631636 section_info = {}
632- slack = section .sizeof_raw_data - section .virtual_size
633- section_info ["PointerToRawData" ] = section .pointerto_raw_data
634- section_info ["VirtualAddress" ] = section .virtual_size
635- section_info ["SizeOfRawData" ] = section .sizeof_raw_data
637+ slack = section .sizeof_raw_data - section .virtual_size # type: ignore
638+ section_info ["PointerToRawData" ] = section .pointerto_raw_data # type: ignore
639+ section_info ["VirtualAddress" ] = section .virtual_size # type: ignore
640+ section_info ["SizeOfRawData" ] = section .sizeof_raw_data # type: ignore
636641 cleaned_dump [section .name ] = section_info
637642 if slack > 0 :
638643 size_of_slack .append (slack )
639- start_of_slack .append (section .pointerto_raw_data + section .virtual_size )
644+ start_of_slack .append (section .pointerto_raw_data + section .virtual_size ) # type: ignore
640645
641646 if save_to_json_path is not None :
642647 with open (save_to_json_path , "w" , encoding = "utf8" ) as outfile :
@@ -675,7 +680,7 @@ def insert_section(
675680 :param bytes_to_assign: (Optional) how many bytes we wish to specify when inserting a new section.
676681 If unspecified the whole l0 budget will be used on a single section.
677682 :param verbose: lief outputs a lot to the console, particularly if we are processing many files.
678- By default suppress printing of messages. Can be toggled on/off by True/False
683+ By default, suppress printing of messages. Can be toggled on/off by True/False
679684 :return manipulated_data: Executable with section inserted and turned into a numpy array of
680685 the appropriate size
681686 :return len(manipulated_file): Size of original file
@@ -690,7 +695,11 @@ def insert_section(
690695 if not verbose :
691696 lief .logging .disable ()
692697
693- binary = lief .PE .parse (datapoint )
698+ binary_parse = lief .PE .parse (datapoint )
699+ if binary_parse is not None :
700+ binary = binary_parse
701+ else :
702+ raise ValueError ("Failed to load binary." )
694703
695704 name_in_use = True
696705 while name_in_use :
@@ -705,8 +714,8 @@ def insert_section(
705714 new_section = lief .PE .Section (new_section_name )
706715
707716 if bytes_to_assign is None :
708- if self .l_0 < 1 : # l0 is a fraction of the filesize
709- # if its a filepath we need to get the file size
717+ if self .l_0 < 1 : # l0 is a fraction of the file size
718+ # if it's a filepath we need to get the file size
710719 if isinstance (datapoint , str ):
711720 with open (datapoint , "rb" ) as file :
712721 open_file = file .read ()
@@ -721,9 +730,9 @@ def insert_section(
721730 perturbation_size = int (sample_size * self .l_0 )
722731 else : # or l0 is interpreted as total perturbation size
723732 perturbation_size = int (self .l_0 )
724- new_section .content = [random .randint (0 , 255 ) for _ in range (perturbation_size )]
733+ new_section .content = [random .randint (0 , 255 ) for _ in range (perturbation_size )] # type: ignore
725734 else :
726- new_section .content = [random .randint (0 , 255 ) for _ in range (bytes_to_assign )]
735+ new_section .content = [random .randint (0 , 255 ) for _ in range (bytes_to_assign )] # type: ignore
727736
728737 # we add the new section at the end of the existing sections
729738 section_end_points = []
@@ -766,7 +775,7 @@ def insert_section(
766775
767776 manipulated_data = np .ones ((maxlen ,), dtype = np .uint16 ) * padding_char
768777
769- # Only process files which are less then the max file size supported
778+ # Only process files which are less than the max file size supported
770779 if len (manipulated_file ) < maxlen :
771780 manipulated_data [: len (manipulated_file )] = manipulated_file [:maxlen ]
772781
0 commit comments