1111import logging
1212import yaml
1313import stat
14+ import re
1415from fosslight_util .set_log import init_log
1516import fosslight_util .constant as constant
1617from fosslight_util .output_format import check_output_formats , write_output_file
1920from ._jar_analysis import analyze_jar_file , merge_binary_list
2021from fosslight_util .correct import correct_with_yaml
2122from fosslight_util .oss_item import ScannerItem
23+ from fosslight_util .download import compression_extension
24+ from fosslight_util .write_txt import write_txt_file
2225import hashlib
2326import tlsh
2427from io import open
2528
2629PKG_NAME = "fosslight_binary"
2730logger = logging .getLogger (constant .LOGGER_NAME )
2831
32+ _REMOVE_FILE_EXTENSION_SIMPLE = ['ttf' , 'otf' , 'png' , 'gif' , 'jpg' , 'bmp' , 'jpeg' ]
2933_REMOVE_FILE_EXTENSION = ['qm' , 'xlsx' , 'pdf' , 'pptx' , 'jfif' , 'docx' , 'doc' , 'whl' ,
3034 'xls' , 'xlsm' , 'ppt' , 'mp4' , 'pyc' , 'plist' , 'dat' , 'json' , 'js' ]
3135_REMOVE_FILE_COMMAND_RESULT = [
@@ -70,8 +74,9 @@ def get_checksum_and_tlsh(bin_with_path):
7074 return checksum_value , tlsh_value , error_msg
7175
7276
73- def init (path_to_find_bin , output_file_name , formats , path_to_exclude = []):
77+ def init (path_to_find_bin , output_file_name , formats , path_to_exclude = [], simple_mode = False ):
7478 global _root_path , logger , _start_time
79+ combined_paths_and_files = []
7580
7681 _json_ext = ".json"
7782 _start_time = datetime .now ().strftime ('%y%m%d_%H%M' )
@@ -91,16 +96,26 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
9196 else :
9297 output_path = os .path .abspath (output_path )
9398
94- while len (output_files ) < len (output_extensions ):
95- output_files .append (None )
96- for i , output_extension in enumerate (output_extensions ):
97- if output_files [i ] is None or output_files [i ] == "" :
98- if output_extension == _json_ext :
99- output_files [i ] = f"fosslight_opossum_bin_{ _start_time } "
100- else :
101- output_files [i ] = f"fosslight_report_bin_{ _start_time } "
99+ if simple_mode :
100+ if len (output_files ) == 0 :
101+ combined_paths_and_files .append (os .path .join (output_path , f"binary_list_{ _start_time } " ))
102+ combined_paths_and_files .append (os .path .join (output_path , f"compressed_list_{ _start_time } " ))
103+ else :
104+ for file in output_files :
105+ combined_paths_and_files .append (os .path .join (output_path , str (file )))
106+ combined_paths_and_files .append (os .path .join (output_path , f"{ file } _compressed_list" ))
107+ else :
108+ while len (output_files ) < len (output_extensions ):
109+ output_files .append (None )
102110
103- combined_paths_and_files = [os .path .join (output_path , file ) for file in output_files ]
111+ for i , output_extension in enumerate (output_extensions ):
112+ if output_files [i ] is None or output_files [i ] == "" :
113+ if output_extension == _json_ext :
114+ output_files [i ] = f"fosslight_opossum_bin_{ _start_time } "
115+ else :
116+ output_files [i ] = f"fosslight_report_bin_{ _start_time } "
117+
118+ combined_paths_and_files = [os .path .join (output_path , file ) for file in output_files ]
104119 else :
105120 logger .error (f"Format error - { msg } " )
106121 sys .exit (1 )
@@ -116,10 +131,11 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
116131 return _result_log , combined_paths_and_files , output_extensions
117132
118133
119- def get_file_list (path_to_find , abs_path_to_exclude ):
134+ def get_file_list (path_to_find , abs_path_to_exclude , simple_mode = False ):
120135 bin_list = []
121136 file_cnt = 0
122137 found_jar = False
138+ compressed_list = []
123139
124140 for root , dirs , files in os .walk (path_to_find ):
125141 if os .path .abspath (root ) in abs_path_to_exclude :
@@ -148,6 +164,9 @@ def get_file_list(path_to_find, abs_path_to_exclude):
148164 bin_item .source_name_or_path = bin_with_path .replace (
149165 _root_path , '' , 1 )
150166
167+ if simple_mode and f".{ extension } " in compression_extension :
168+ compressed_list .append (bin_item .bin_name_with_path )
169+
151170 if any (dir_name in dir_path for dir_name in _EXCLUDE_DIR ):
152171 bin_item .exclude = True
153172 elif file .lower () in _EXCLUDE_FILE :
@@ -156,20 +175,38 @@ def get_file_list(path_to_find, abs_path_to_exclude):
156175 bin_item .exclude = True
157176 bin_list .append (bin_item )
158177 file_cnt += 1
159- return file_cnt , bin_list , found_jar
178+ return file_cnt , bin_list , found_jar , compressed_list
179+
180+
181+ def exclude_bin_for_simple_mode (binary_list ):
182+ for bin in binary_list :
183+ if not (re .search (r".*source\.jar" , bin .bin_name_with_path .lower ()) or bin .exclude ):
184+ yield bin .bin_name_with_path
185+
186+
187+ def log_result_msg (results : list , scan_item : ScannerItem ):
188+ for success_to_write , writing_msg , result_file in results :
189+ if success_to_write :
190+ if result_file :
191+ logger .info (f"Output file :{ result_file } " )
192+ else :
193+ logger .warning (f"{ writing_msg } " )
194+ for row in scan_item .get_cover_comment ():
195+ logger .info (row )
196+ else :
197+ logger .error (f"Fail to generate result file.:{ writing_msg } " )
160198
161199
162200def find_binaries (path_to_find_bin , output_dir , formats , dburl = "" , simple_mode = False ,
163201 correct_mode = True , correct_filepath = "" , path_to_exclude = []):
164202
165203 _result_log , result_reports , output_extensions = init (
166- path_to_find_bin , output_dir , formats , path_to_exclude )
204+ path_to_find_bin , output_dir , formats , path_to_exclude , simple_mode )
167205
168206 total_bin_cnt = 0
169207 total_file_cnt = 0
170208 db_loaded_cnt = 0
171209 success_to_write = False
172- writing_msg = ""
173210 results = []
174211 bin_list = []
175212 base_dir_name = os .path .basename (path_to_find_bin )
@@ -183,15 +220,16 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
183220 if not correct_filepath :
184221 correct_filepath = path_to_find_bin
185222 try :
186- total_file_cnt , file_list , found_jar = get_file_list (path_to_find_bin , abs_path_to_exclude )
187- return_list = list (return_bin_only (file_list ))
223+ total_file_cnt , file_list , found_jar , compressed_list = get_file_list (path_to_find_bin , abs_path_to_exclude , simple_mode )
224+ return_list = list (return_bin_only (file_list , True , simple_mode ))
188225 except Exception as ex :
189226 error_occured (error_msg = f"Failed to check whether it is binary or not : { ex } " ,
190227 result_log = _result_log ,
191228 exit = True )
192229 total_bin_cnt = len (return_list )
193230 if simple_mode :
194- bin_list = [bin .bin_name_with_path for bin in return_list ]
231+ bin_list = list (exclude_bin_for_simple_mode (return_list ))
232+ print_simple_mode (result_reports , compressed_list , bin_list )
195233 else :
196234 scan_item = ScannerItem (PKG_NAME , _start_time )
197235 scan_item .set_cover_pathinfo (path_to_find_bin , path_to_exclude )
@@ -226,17 +264,8 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
226264
227265 except Exception as ex :
228266 error_occured (error_msg = str (ex ), exit = False )
229-
230- for success_to_write , writing_msg , result_file in results :
231- if success_to_write :
232- if result_file :
233- logger .info (f"Output file :{ result_file } " )
234- else :
235- logger .warning (f"{ writing_msg } " )
236- for row in scan_item .get_cover_comment ():
237- logger .info (row )
238- else :
239- logger .error (f"Fail to generate result file.:{ writing_msg } " )
267+
268+ log_result_msg (results , scan_item )
240269
241270 try :
242271 print_result_log (success = True , result_log = _result_log ,
@@ -249,10 +278,10 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
249278 return success_to_write , scan_item
250279
251280
252- def return_bin_only (file_list , need_checksum_tlsh = True ):
281+ def return_bin_only (file_list , need_checksum_tlsh = True , simple_mode = False ):
253282 for file_item in file_list :
254283 try :
255- if check_binary (file_item .bin_name_with_path ):
284+ if check_binary (file_item .bin_name_with_path , simple_mode ):
256285 if need_checksum_tlsh :
257286 file_item .checksum , file_item .tlsh , error_msg = get_checksum_and_tlsh (file_item .bin_name_with_path )
258287 if error_msg :
@@ -264,11 +293,20 @@ def return_bin_only(file_list, need_checksum_tlsh=True):
264293 yield file_item
265294
266295
267- def check_binary (file_with_path ):
296+ def check_binary (file_with_path , simple_mode = False ):
268297 is_bin_confirmed = False
269298 file = os .path .basename (file_with_path )
270299 extension = os .path .splitext (file )[1 ][1 :]
271- if not os .path .islink (file_with_path ) and extension .lower () not in _REMOVE_FILE_EXTENSION :
300+ compres_ext_wo_dot = []
301+
302+ if simple_mode :
303+ for ext in compression_extension :
304+ compres_ext_wo_dot .append (ext .lstrip ('.' ))
305+ remove_file_ext_list = _REMOVE_FILE_EXTENSION + _REMOVE_FILE_EXTENSION_SIMPLE + compres_ext_wo_dot
306+ else :
307+ remove_file_ext_list = _REMOVE_FILE_EXTENSION
308+
309+ if not os .path .islink (file_with_path ) and extension .lower () not in remove_file_ext_list :
272310 if stat .S_ISFIFO (os .stat (file_with_path ).st_mode ):
273311 return False
274312 file_command_result = ""
@@ -324,3 +362,30 @@ def print_result_log(success=True, result_log={}, file_cnt="", bin_file_cnt="",
324362 logger .info (_str_final_result_log )
325363 except Exception as ex :
326364 logger .warning (f"Error to print final log: { ex } " )
365+
366+
367+ def convert_list_to_str (input_list ):
368+ output_text = '\n ' .join (map (str , input_list ))
369+ return output_text
370+
371+
372+ def print_simple_mode (result_reports , compressed_list , bin_list ):
373+ present_comp_txt = False
374+ present_bin_txt = False
375+
376+ for file in result_reports :
377+ if 'compressed_list_' in file :
378+ present_comp_txt = True
379+ compressed_list_txt = f"{ file } .txt"
380+ else :
381+ present_bin_txt = True
382+ simplie_mode_bin_list_txt = f"{ file } .txt"
383+
384+ if compressed_list and present_comp_txt :
385+ success , error = write_txt_file (compressed_list_txt , convert_list_to_str (compressed_list ))
386+ if not success :
387+ logger .info (f"Error to write compressed list file for simple mode : { error } " )
388+ if bin_list and present_bin_txt :
389+ success , error = write_txt_file (simplie_mode_bin_list_txt , convert_list_to_str (bin_list ))
390+ if not success :
391+ logger .info (f"Error to write binary list file for simple mode : { error } " )
0 commit comments