@@ -15,25 +15,27 @@ class StringsScanner(AbstractScanner, ABC):
1515 """Implements known binary file scanning with ASCII strings representations"""
1616
1717 @staticmethod
18- def get_strings (data : bytes ) -> List [Tuple [str , int ]]:
18+ def get_enumerated_lines (data : bytes ) -> List [Tuple [int , str ]]:
1919 """Processes binary to found ASCII strings. Use offset instead line number."""
20- strings = []
21- offset = 0
22- line = ''
20+ enumerated_lines = []
21+ offset = - 1
22+ line_items = []
2323 for n , x in enumerate (data ):
2424 if 0x09 == x or 0x20 <= x <= 0x7E :
2525 # TAB, SPACE and visible ASCII symbols
26- if not offset :
27- # for line number
26+ if 0 > offset :
27+ # use start of string as line number
2828 offset = n
29- line += chr (x )
30- elif MIN_DATA_LEN <= len (line ):
31- strings .append ((line , offset ))
32- offset = 0
33- line = ''
34- if MIN_DATA_LEN <= len (line ):
35- strings .append ((line , offset ))
36- return strings
29+ line_items .append (chr (x ))
30+ continue
31+ if MIN_DATA_LEN <= len (line_items ):
32+ # add valuable lines only
33+ enumerated_lines .append ((offset , '' .join (line_items )))
34+ offset = - 1
35+ line_items .clear ()
36+ if MIN_DATA_LEN <= len (line_items ):
37+ enumerated_lines .append ((offset , '' .join (line_items )))
38+ return enumerated_lines
3739
3840 def data_scan (
3941 self , #
@@ -42,9 +44,9 @@ def data_scan(
4244 recursive_limit_size : int ) -> Optional [List [Candidate ]]:
4345 """Extracts data file from .ar (debian) archive and launches data_scan"""
4446
45- if strings := StringsScanner .get_strings (data_provider .data ):
46- string_data_provider = StringContentProvider (lines = [x [0 ] for x in strings ],
47- line_numbers = [x [1 ] for x in strings ],
47+ if strings := StringsScanner .get_enumerated_lines (data_provider .data ):
48+ string_data_provider = StringContentProvider (lines = [x [1 ] for x in strings ],
49+ line_numbers = [x [0 ] for x in strings ],
4850 file_path = data_provider .file_path ,
4951 file_type = data_provider .file_type ,
5052 info = f"{ data_provider .info } |STRINGS" )
0 commit comments