1111import logging
1212import yaml
1313import stat
14+ import re
1415from fosslight_util .set_log import init_log
1516import fosslight_util .constant as constant
1617from fosslight_util .output_format import check_output_formats , write_output_file
1920from ._jar_analysis import analyze_jar_file , merge_binary_list
2021from fosslight_util .correct import correct_with_yaml
2122from fosslight_util .oss_item import ScannerItem
23+ from fosslight_util .download import compression_extension
24+ from fosslight_util .write_txt import write_txt_file
2225import hashlib
2326import tlsh
2427from io import open
2528
2629PKG_NAME = "fosslight_binary"
2730logger = logging .getLogger (constant .LOGGER_NAME )
2831
32+ _REMOVE_FILE_EXTENSION_SIMPLE = ['ttf' , 'otf' , 'png' , 'gif' , 'jpg' , 'bmp' , 'jpeg' ]
2933_REMOVE_FILE_EXTENSION = ['qm' , 'xlsx' , 'pdf' , 'pptx' , 'jfif' , 'docx' , 'doc' , 'whl' ,
3034 'xls' , 'xlsm' , 'ppt' , 'mp4' , 'pyc' , 'plist' , 'dat' , 'json' , 'js' ]
3135_REMOVE_FILE_COMMAND_RESULT = [
4852 'Homepage' , 'Copyright Text' , 'Exclude' ,
4953 'Comment' , 'Vulnerability Link' , 'TLSH' , 'SHA1' ]}
5054HIDE_HEADER = {'TLSH' , "SHA1" }
55+ SUPPORT_FORMAT = {'excel' : '.xlsx' , 'csv' : '.csv' , 'opossum' : '.json' , 'yaml' : '.yaml' , 'text' : '.txt' }
5156
5257
5358def get_checksum_and_tlsh (bin_with_path ):
@@ -70,8 +75,9 @@ def get_checksum_and_tlsh(bin_with_path):
7075 return checksum_value , tlsh_value , error_msg
7176
7277
73- def init (path_to_find_bin , output_file_name , formats , path_to_exclude = []):
78+ def init (path_to_find_bin , output_file_name , formats , path_to_exclude = [], simple_mode = False ):
7479 global _root_path , logger , _start_time
80+ combined_paths_and_files = []
7581
7682 _json_ext = ".json"
7783 _start_time = datetime .now ().strftime ('%y%m%d_%H%M' )
@@ -83,7 +89,10 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
8389 if not path_to_find_bin .endswith (os .path .sep ):
8490 _root_path += os .path .sep
8591
86- success , msg , output_path , output_files , output_extensions = check_output_formats (output_file_name , formats )
92+ success , msg , output_path , output_files , output_extensions = check_output_formats (output_file_name , formats , SUPPORT_FORMAT , True )
93+ print (f"output_path from init : { output_path } " )
94+ print (f"output_files from init : { output_files } " )
95+ print (f"output_extensions from init : { output_extensions } " )
8796
8897 if success :
8998 if output_path == "" :
@@ -93,14 +102,19 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
93102
94103 while len (output_files ) < len (output_extensions ):
95104 output_files .append (None )
96- for i , output_extension in enumerate (output_extensions ):
97- if output_files [i ] is None or output_files [i ] == "" :
98- if output_extension == _json_ext :
99- output_files [i ] = f"fosslight_opossum_bin_{ _start_time } "
100- else :
101- output_files [i ] = f"fosslight_report_bin_{ _start_time } "
102-
103- combined_paths_and_files = [os .path .join (output_path , file ) for file in output_files ]
105+ if simple_mode :
106+ if output_files is None or output_files == "" :
107+ combined_paths_and_files .append (os .path .join (output_path , f"binary_list_{ _start_time } " ))
108+ combined_paths_and_files .append (os .path .join (output_path , f"compressed_list_{ _start_time } " ))
109+ else :
110+ for i , output_extension in enumerate (output_extensions ):
111+ if output_files [i ] is None or output_files [i ] == "" :
112+ if output_extension == _json_ext :
113+ output_files [i ] = f"fosslight_opossum_bin_{ _start_time } "
114+ else :
115+ output_files [i ] = f"fosslight_report_bin_{ _start_time } "
116+
117+ combined_paths_and_files = [os .path .join (output_path , file ) for file in output_files ]
104118 else :
105119 logger .error (f"Format error - { msg } " )
106120 sys .exit (1 )
@@ -116,10 +130,11 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
116130 return _result_log , combined_paths_and_files , output_extensions
117131
118132
119- def get_file_list (path_to_find , abs_path_to_exclude ):
133+ def get_file_list (path_to_find , abs_path_to_exclude , simple_mode = False ):
120134 bin_list = []
121135 file_cnt = 0
122136 found_jar = False
137+ compressed_list = []
123138
124139 for root , dirs , files in os .walk (path_to_find ):
125140 if os .path .abspath (root ) in abs_path_to_exclude :
@@ -148,6 +163,9 @@ def get_file_list(path_to_find, abs_path_to_exclude):
148163 bin_item .source_name_or_path = bin_with_path .replace (
149164 _root_path , '' , 1 )
150165
166+ if simple_mode and f".{ extension } " in compression_extension :
167+ compressed_list .append (bin_item .bin_name_with_path )
168+
151169 if any (dir_name in dir_path for dir_name in _EXCLUDE_DIR ):
152170 bin_item .exclude = True
153171 elif file .lower () in _EXCLUDE_FILE :
@@ -156,20 +174,44 @@ def get_file_list(path_to_find, abs_path_to_exclude):
156174 bin_item .exclude = True
157175 bin_list .append (bin_item )
158176 file_cnt += 1
159- return file_cnt , bin_list , found_jar
177+ return file_cnt , bin_list , found_jar , compressed_list
178+
179+
180+ def exclude_bin_for_simple_mode (binary_list ):
181+ for bin in binary_list :
182+ if not (re .search (r".*source\.jar" , bin .bin_name_with_path .lower ()) or bin .exclude ):
183+ yield bin .bin_name_with_path
184+
185+
186+ def log_result_msg (results :list , scan_item :ScannerItem ):
187+ for success_to_write , writing_msg , result_file in results :
188+ if success_to_write :
189+ if result_file :
190+ logger .info (f"Output file :{ result_file } " )
191+ else :
192+ logger .warning (f"{ writing_msg } " )
193+ for row in scan_item .get_cover_comment ():
194+ logger .info (row )
195+ else :
196+ logger .error (f"Fail to generate result file.:{ writing_msg } " )
197+
198+
199+ # def get_result_file(result_reports:list, output_extensions:list, results:list, compressed_list:list, bin_list:list):
200+ # for combined_path_and_file, output_extension in zip(result_reports, output_extensions):
201+ # results.append(write_output_file(combined_path_and_file, output_extension, scan_item, BIN_EXT_HEADER, HIDE_HEADER))
202+ # return results
160203
161204
162205def find_binaries (path_to_find_bin , output_dir , formats , dburl = "" , simple_mode = False ,
163206 correct_mode = True , correct_filepath = "" , path_to_exclude = []):
164207
165208 _result_log , result_reports , output_extensions = init (
166- path_to_find_bin , output_dir , formats , path_to_exclude )
209+ path_to_find_bin , output_dir , formats , path_to_exclude , simple_mode )
167210
168211 total_bin_cnt = 0
169212 total_file_cnt = 0
170213 db_loaded_cnt = 0
171214 success_to_write = False
172- writing_msg = ""
173215 results = []
174216 bin_list = []
175217 base_dir_name = os .path .basename (path_to_find_bin )
@@ -183,15 +225,22 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
183225 if not correct_filepath :
184226 correct_filepath = path_to_find_bin
185227 try :
186- total_file_cnt , file_list , found_jar = get_file_list (path_to_find_bin , abs_path_to_exclude )
187- return_list = list (return_bin_only (file_list ))
228+ total_file_cnt , file_list , found_jar , compressed_list = get_file_list (path_to_find_bin , abs_path_to_exclude , simple_mode )
229+ return_list = list (return_bin_only (file_list , True , simple_mode ))
188230 except Exception as ex :
189231 error_occured (error_msg = f"Failed to check whether it is binary or not : { ex } " ,
190232 result_log = _result_log ,
191233 exit = True )
192234 total_bin_cnt = len (return_list )
193235 if simple_mode :
194- bin_list = [bin .bin_name_with_path for bin in return_list ]
236+ bin_list = list (exclude_bin_for_simple_mode (return_list ))
237+
238+ # results = get_result_file(result_reports, output_extensions, results, compressed_list, bin_list)
239+
240+ print (f"result_reports: { result_reports } " )
241+ print (f"output_extensions: { output_extensions } " )
242+
243+ print_simple_mode (result_reports , output_extensions , compressed_list , bin_list )
195244 else :
196245 scan_item = ScannerItem (PKG_NAME , _start_time )
197246 scan_item .set_cover_pathinfo (path_to_find_bin , path_to_exclude )
@@ -226,17 +275,8 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
226275
227276 except Exception as ex :
228277 error_occured (error_msg = str (ex ), exit = False )
229-
230- for success_to_write , writing_msg , result_file in results :
231- if success_to_write :
232- if result_file :
233- logger .info (f"Output file :{ result_file } " )
234- else :
235- logger .warning (f"{ writing_msg } " )
236- for row in scan_item .get_cover_comment ():
237- logger .info (row )
238- else :
239- logger .error (f"Fail to generate result file.:{ writing_msg } " )
278+
279+ log_result_msg (results , scan_item )
240280
241281 try :
242282 print_result_log (success = True , result_log = _result_log ,
@@ -249,10 +289,10 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
249289 return success_to_write , scan_item
250290
251291
252- def return_bin_only (file_list , need_checksum_tlsh = True ):
292+ def return_bin_only (file_list , need_checksum_tlsh = True , simple_mode = False ):
253293 for file_item in file_list :
254294 try :
255- if check_binary (file_item .bin_name_with_path ):
295+ if check_binary (file_item .bin_name_with_path , simple_mode ):
256296 if need_checksum_tlsh :
257297 file_item .checksum , file_item .tlsh , error_msg = get_checksum_and_tlsh (file_item .bin_name_with_path )
258298 if error_msg :
@@ -264,11 +304,17 @@ def return_bin_only(file_list, need_checksum_tlsh=True):
264304 yield file_item
265305
266306
267- def check_binary (file_with_path ):
307+ def check_binary (file_with_path , simple_mode = False ):
268308 is_bin_confirmed = False
269309 file = os .path .basename (file_with_path )
270310 extension = os .path .splitext (file )[1 ][1 :]
271- if not os .path .islink (file_with_path ) and extension .lower () not in _REMOVE_FILE_EXTENSION :
311+
312+ if simple_mode :
313+ remove_file_ext_list = _REMOVE_FILE_EXTENSION + _REMOVE_FILE_EXTENSION_SIMPLE + list (compression_extension )
314+ else :
315+ remove_file_ext_list = _REMOVE_FILE_EXTENSION
316+
317+ if not os .path .islink (file_with_path ) and extension .lower () not in remove_file_ext_list :
272318 if stat .S_ISFIFO (os .stat (file_with_path ).st_mode ):
273319 return False
274320 file_command_result = ""
@@ -324,3 +370,30 @@ def print_result_log(success=True, result_log={}, file_cnt="", bin_file_cnt="",
324370 logger .info (_str_final_result_log )
325371 except Exception as ex :
326372 logger .warning (f"Error to print final log: { ex } " )
373+
374+
375+ def convert_list_to_str (input_list ):
376+ output_text = '\n ' .join (map (str , input_list ))
377+ return output_text
378+
379+
380+ def print_simple_mode (result_reports , output_extensions , compressed_list , bin_list ):
381+ print (f"result_reports: { result_reports } " )
382+ compressed_list_txt = ""
383+ simplie_mode_bin_list_txt = ""
384+
385+ for item in result_reports :
386+ if 'compressed_list_' in item :
387+ compressed_list_txt = f"{ item } .{ output_extensions } "
388+ else :
389+ simplie_mode_bin_list_txt = f"{ item } .{ output_extensions } "
390+
391+
392+ if compressed_list :
393+ success , error = write_txt_file (compressed_list_txt , convert_list_to_str (compressed_list ))
394+ if not success :
395+ logger .info (f"Error to write compressed list file for simple mode : { error } " )
396+ if bin_list :
397+ success , error = write_txt_file (simplie_mode_bin_list_txt , convert_list_to_str (bin_list ))
398+ if not success :
399+ logger .info (f"Error to write binary list file for simple mode : { error } " )
0 commit comments