Skip to content

Commit 74d5f44

Browse files
committed
Modify simple mode
1 parent 6658d24 commit 74d5f44

File tree

2 files changed

+106
-33
lines changed

2 files changed

+106
-33
lines changed

src/fosslight_binary/binary_analysis.py

Lines changed: 105 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import logging
1212
import yaml
1313
import stat
14+
import re
1415
from fosslight_util.set_log import init_log
1516
import fosslight_util.constant as constant
1617
from fosslight_util.output_format import check_output_formats, write_output_file
@@ -19,13 +20,16 @@
1920
from ._jar_analysis import analyze_jar_file, merge_binary_list
2021
from fosslight_util.correct import correct_with_yaml
2122
from fosslight_util.oss_item import ScannerItem
23+
from fosslight_util.download import compression_extension
24+
from fosslight_util.write_txt import write_txt_file
2225
import hashlib
2326
import tlsh
2427
from io import open
2528

2629
PKG_NAME = "fosslight_binary"
2730
logger = logging.getLogger(constant.LOGGER_NAME)
2831

32+
_REMOVE_FILE_EXTENSION_SIMPLE = ['ttf', 'otf', 'png', 'gif', 'jpg', 'bmp', 'jpeg']
2933
_REMOVE_FILE_EXTENSION = ['qm', 'xlsx', 'pdf', 'pptx', 'jfif', 'docx', 'doc', 'whl',
3034
'xls', 'xlsm', 'ppt', 'mp4', 'pyc', 'plist', 'dat', 'json', 'js']
3135
_REMOVE_FILE_COMMAND_RESULT = [
@@ -48,6 +52,7 @@
4852
'Homepage', 'Copyright Text', 'Exclude',
4953
'Comment', 'Vulnerability Link', 'TLSH', 'SHA1']}
5054
HIDE_HEADER = {'TLSH', "SHA1"}
55+
SUPPORT_FORMAT = {'excel': '.xlsx', 'csv': '.csv', 'opossum': '.json', 'yaml': '.yaml', 'text': '.txt'}
5156

5257

5358
def get_checksum_and_tlsh(bin_with_path):
@@ -70,8 +75,9 @@ def get_checksum_and_tlsh(bin_with_path):
7075
return checksum_value, tlsh_value, error_msg
7176

7277

73-
def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
78+
def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[], simple_mode=False):
7479
global _root_path, logger, _start_time
80+
combined_paths_and_files = []
7581

7682
_json_ext = ".json"
7783
_start_time = datetime.now().strftime('%y%m%d_%H%M')
@@ -83,7 +89,10 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
8389
if not path_to_find_bin.endswith(os.path.sep):
8490
_root_path += os.path.sep
8591

86-
success, msg, output_path, output_files, output_extensions = check_output_formats(output_file_name, formats)
92+
success, msg, output_path, output_files, output_extensions = check_output_formats(output_file_name, formats, SUPPORT_FORMAT, True)
93+
print(f"output_path from init : {output_path}")
94+
print(f"output_files from init : {output_files}")
95+
print(f"output_extensions from init : {output_extensions}")
8796

8897
if success:
8998
if output_path == "":
@@ -93,14 +102,19 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
93102

94103
while len(output_files) < len(output_extensions):
95104
output_files.append(None)
96-
for i, output_extension in enumerate(output_extensions):
97-
if output_files[i] is None or output_files[i] == "":
98-
if output_extension == _json_ext:
99-
output_files[i] = f"fosslight_opossum_bin_{_start_time}"
100-
else:
101-
output_files[i] = f"fosslight_report_bin_{_start_time}"
102-
103-
combined_paths_and_files = [os.path.join(output_path, file) for file in output_files]
105+
if simple_mode:
106+
if output_files is None or output_files == "":
107+
combined_paths_and_files.append(os.path.join(output_path, f"binary_list_{_start_time}"))
108+
combined_paths_and_files.append(os.path.join(output_path, f"compressed_list_{_start_time}"))
109+
else:
110+
for i, output_extension in enumerate(output_extensions):
111+
if output_files[i] is None or output_files[i] == "":
112+
if output_extension == _json_ext:
113+
output_files[i] = f"fosslight_opossum_bin_{_start_time}"
114+
else:
115+
output_files[i] = f"fosslight_report_bin_{_start_time}"
116+
117+
combined_paths_and_files = [os.path.join(output_path, file) for file in output_files]
104118
else:
105119
logger.error(f"Format error - {msg}")
106120
sys.exit(1)
@@ -116,10 +130,11 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
116130
return _result_log, combined_paths_and_files, output_extensions
117131

118132

119-
def get_file_list(path_to_find, abs_path_to_exclude):
133+
def get_file_list(path_to_find, abs_path_to_exclude, simple_mode=False):
120134
bin_list = []
121135
file_cnt = 0
122136
found_jar = False
137+
compressed_list = []
123138

124139
for root, dirs, files in os.walk(path_to_find):
125140
if os.path.abspath(root) in abs_path_to_exclude:
@@ -148,6 +163,9 @@ def get_file_list(path_to_find, abs_path_to_exclude):
148163
bin_item.source_name_or_path = bin_with_path.replace(
149164
_root_path, '', 1)
150165

166+
if simple_mode and f".{extension}" in compression_extension:
167+
compressed_list.append(bin_item.bin_name_with_path)
168+
151169
if any(dir_name in dir_path for dir_name in _EXCLUDE_DIR):
152170
bin_item.exclude = True
153171
elif file.lower() in _EXCLUDE_FILE:
@@ -156,20 +174,44 @@ def get_file_list(path_to_find, abs_path_to_exclude):
156174
bin_item.exclude = True
157175
bin_list.append(bin_item)
158176
file_cnt += 1
159-
return file_cnt, bin_list, found_jar
177+
return file_cnt, bin_list, found_jar, compressed_list
178+
179+
180+
def exclude_bin_for_simple_mode(binary_list):
181+
for bin in binary_list:
182+
if not (re.search(r".*source\.jar", bin.bin_name_with_path.lower()) or bin.exclude):
183+
yield bin.bin_name_with_path
184+
185+
186+
def log_result_msg(results:list, scan_item:ScannerItem):
187+
for success_to_write, writing_msg, result_file in results:
188+
if success_to_write:
189+
if result_file:
190+
logger.info(f"Output file :{result_file}")
191+
else:
192+
logger.warning(f"{writing_msg}")
193+
for row in scan_item.get_cover_comment():
194+
logger.info(row)
195+
else:
196+
logger.error(f"Fail to generate result file.:{writing_msg}")
197+
198+
199+
# def get_result_file(result_reports:list, output_extensions:list, results:list, compressed_list:list, bin_list:list):
200+
# for combined_path_and_file, output_extension in zip(result_reports, output_extensions):
201+
# results.append(write_output_file(combined_path_and_file, output_extension, scan_item, BIN_EXT_HEADER, HIDE_HEADER))
202+
# return results
160203

161204

162205
def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=False,
163206
correct_mode=True, correct_filepath="", path_to_exclude=[]):
164207

165208
_result_log, result_reports, output_extensions = init(
166-
path_to_find_bin, output_dir, formats, path_to_exclude)
209+
path_to_find_bin, output_dir, formats, path_to_exclude, simple_mode)
167210

168211
total_bin_cnt = 0
169212
total_file_cnt = 0
170213
db_loaded_cnt = 0
171214
success_to_write = False
172-
writing_msg = ""
173215
results = []
174216
bin_list = []
175217
base_dir_name = os.path.basename(path_to_find_bin)
@@ -183,15 +225,22 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
183225
if not correct_filepath:
184226
correct_filepath = path_to_find_bin
185227
try:
186-
total_file_cnt, file_list, found_jar = get_file_list(path_to_find_bin, abs_path_to_exclude)
187-
return_list = list(return_bin_only(file_list))
228+
total_file_cnt, file_list, found_jar, compressed_list = get_file_list(path_to_find_bin, abs_path_to_exclude, simple_mode)
229+
return_list = list(return_bin_only(file_list, True, simple_mode))
188230
except Exception as ex:
189231
error_occured(error_msg=f"Failed to check whether it is binary or not : {ex}",
190232
result_log=_result_log,
191233
exit=True)
192234
total_bin_cnt = len(return_list)
193235
if simple_mode:
194-
bin_list = [bin.bin_name_with_path for bin in return_list]
236+
bin_list = list(exclude_bin_for_simple_mode(return_list))
237+
238+
# results = get_result_file(result_reports, output_extensions, results, compressed_list, bin_list)
239+
240+
print(f"result_reports: {result_reports}")
241+
print(f"output_extensions: {output_extensions}")
242+
243+
print_simple_mode(result_reports, output_extensions, compressed_list, bin_list)
195244
else:
196245
scan_item = ScannerItem(PKG_NAME, _start_time)
197246
scan_item.set_cover_pathinfo(path_to_find_bin, path_to_exclude)
@@ -226,17 +275,8 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
226275

227276
except Exception as ex:
228277
error_occured(error_msg=str(ex), exit=False)
229-
230-
for success_to_write, writing_msg, result_file in results:
231-
if success_to_write:
232-
if result_file:
233-
logger.info(f"Output file :{result_file}")
234-
else:
235-
logger.warning(f"{writing_msg}")
236-
for row in scan_item.get_cover_comment():
237-
logger.info(row)
238-
else:
239-
logger.error(f"Fail to generate result file.:{writing_msg}")
278+
279+
log_result_msg(results, scan_item)
240280

241281
try:
242282
print_result_log(success=True, result_log=_result_log,
@@ -249,10 +289,10 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
249289
return success_to_write, scan_item
250290

251291

252-
def return_bin_only(file_list, need_checksum_tlsh=True):
292+
def return_bin_only(file_list, need_checksum_tlsh=True, simple_mode=False):
253293
for file_item in file_list:
254294
try:
255-
if check_binary(file_item.bin_name_with_path):
295+
if check_binary(file_item.bin_name_with_path, simple_mode):
256296
if need_checksum_tlsh:
257297
file_item.checksum, file_item.tlsh, error_msg = get_checksum_and_tlsh(file_item.bin_name_with_path)
258298
if error_msg:
@@ -264,11 +304,17 @@ def return_bin_only(file_list, need_checksum_tlsh=True):
264304
yield file_item
265305

266306

267-
def check_binary(file_with_path):
307+
def check_binary(file_with_path, simple_mode=False):
268308
is_bin_confirmed = False
269309
file = os.path.basename(file_with_path)
270310
extension = os.path.splitext(file)[1][1:]
271-
if not os.path.islink(file_with_path) and extension.lower() not in _REMOVE_FILE_EXTENSION:
311+
312+
if simple_mode:
313+
remove_file_ext_list = _REMOVE_FILE_EXTENSION + _REMOVE_FILE_EXTENSION_SIMPLE + list(compression_extension)
314+
else:
315+
remove_file_ext_list = _REMOVE_FILE_EXTENSION
316+
317+
if not os.path.islink(file_with_path) and extension.lower() not in remove_file_ext_list:
272318
if stat.S_ISFIFO(os.stat(file_with_path).st_mode):
273319
return False
274320
file_command_result = ""
@@ -324,3 +370,30 @@ def print_result_log(success=True, result_log={}, file_cnt="", bin_file_cnt="",
324370
logger.info(_str_final_result_log)
325371
except Exception as ex:
326372
logger.warning(f"Error to print final log: {ex}")
373+
374+
375+
def convert_list_to_str(input_list):
376+
output_text = '\n'.join(map(str, input_list))
377+
return output_text
378+
379+
380+
def print_simple_mode(result_reports, output_extensions, compressed_list, bin_list):
381+
print(f"result_reports: {result_reports}")
382+
compressed_list_txt = ""
383+
simplie_mode_bin_list_txt = ""
384+
385+
for item in result_reports:
386+
if 'compressed_list_' in item:
387+
compressed_list_txt = f"{item}.{output_extensions}"
388+
else:
389+
simplie_mode_bin_list_txt = f"{item}.{output_extensions}"
390+
391+
392+
if compressed_list:
393+
success, error = write_txt_file(compressed_list_txt, convert_list_to_str(compressed_list))
394+
if not success:
395+
logger.info(f"Error to write compressed list file for simple mode : {error}")
396+
if bin_list:
397+
success, error = write_txt_file(simplie_mode_bin_list_txt, convert_list_to_str(bin_list))
398+
if not success:
399+
logger.info(f"Error to write binary list file for simple mode : {error}")

src/fosslight_binary/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def main():
4949
print_package_version(_PKG_NAME, "FOSSLight Binary Scanner Version:")
5050
sys.exit(0)
5151

52-
if args.simple:
52+
if args.simple: # -s option
5353
simple_mode = True
5454

5555
if args.path: # -p option

0 commit comments

Comments
 (0)