Skip to content

Commit 17efdc9

Browse files
committed
Apply simple mode
1 parent 84200ac commit 17efdc9

File tree

2 files changed

+98
-33
lines changed

2 files changed

+98
-33
lines changed

src/fosslight_binary/binary_analysis.py

Lines changed: 97 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import logging
1212
import yaml
1313
import stat
14+
import re
1415
from fosslight_util.set_log import init_log
1516
import fosslight_util.constant as constant
1617
from fosslight_util.output_format import check_output_formats, write_output_file
@@ -19,13 +20,16 @@
1920
from ._jar_analysis import analyze_jar_file, merge_binary_list
2021
from fosslight_util.correct import correct_with_yaml
2122
from fosslight_util.oss_item import ScannerItem
23+
from fosslight_util.download import compression_extension
24+
from fosslight_util.write_txt import write_txt_file
2225
import hashlib
2326
import tlsh
2427
from io import open
2528

2629
PKG_NAME = "fosslight_binary"
2730
logger = logging.getLogger(constant.LOGGER_NAME)
2831

32+
_REMOVE_FILE_EXTENSION_SIMPLE = ['ttf', 'otf', 'png', 'gif', 'jpg', 'bmp', 'jpeg']
2933
_REMOVE_FILE_EXTENSION = ['qm', 'xlsx', 'pdf', 'pptx', 'jfif', 'docx', 'doc', 'whl',
3034
'xls', 'xlsm', 'ppt', 'mp4', 'pyc', 'plist', 'dat', 'json', 'js']
3135
_REMOVE_FILE_COMMAND_RESULT = [
@@ -70,8 +74,9 @@ def get_checksum_and_tlsh(bin_with_path):
7074
return checksum_value, tlsh_value, error_msg
7175

7276

73-
def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
77+
def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[], simple_mode=False):
7478
global _root_path, logger, _start_time
79+
combined_paths_and_files = []
7580

7681
_json_ext = ".json"
7782
_start_time = datetime.now().strftime('%y%m%d_%H%M')
@@ -91,16 +96,26 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
9196
else:
9297
output_path = os.path.abspath(output_path)
9398

94-
while len(output_files) < len(output_extensions):
95-
output_files.append(None)
96-
for i, output_extension in enumerate(output_extensions):
97-
if output_files[i] is None or output_files[i] == "":
98-
if output_extension == _json_ext:
99-
output_files[i] = f"fosslight_opossum_bin_{_start_time}"
100-
else:
101-
output_files[i] = f"fosslight_report_bin_{_start_time}"
99+
if simple_mode:
100+
if len(output_files) == 0:
101+
combined_paths_and_files.append(os.path.join(output_path, f"binary_list_{_start_time}"))
102+
combined_paths_and_files.append(os.path.join(output_path, f"compressed_list_{_start_time}"))
103+
else:
104+
for file in output_files:
105+
combined_paths_and_files.append(os.path.join(output_path, str(file)))
106+
combined_paths_and_files.append(os.path.join(output_path, f"{file}_compressed_list"))
107+
else:
108+
while len(output_files) < len(output_extensions):
109+
output_files.append(None)
102110

103-
combined_paths_and_files = [os.path.join(output_path, file) for file in output_files]
111+
for i, output_extension in enumerate(output_extensions):
112+
if output_files[i] is None or output_files[i] == "":
113+
if output_extension == _json_ext:
114+
output_files[i] = f"fosslight_opossum_bin_{_start_time}"
115+
else:
116+
output_files[i] = f"fosslight_report_bin_{_start_time}"
117+
118+
combined_paths_and_files = [os.path.join(output_path, file) for file in output_files]
104119
else:
105120
logger.error(f"Format error - {msg}")
106121
sys.exit(1)
@@ -116,10 +131,11 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]):
116131
return _result_log, combined_paths_and_files, output_extensions
117132

118133

119-
def get_file_list(path_to_find, abs_path_to_exclude):
134+
def get_file_list(path_to_find, abs_path_to_exclude, simple_mode=False):
120135
bin_list = []
121136
file_cnt = 0
122137
found_jar = False
138+
compressed_list = []
123139

124140
for root, dirs, files in os.walk(path_to_find):
125141
if os.path.abspath(root) in abs_path_to_exclude:
@@ -148,6 +164,9 @@ def get_file_list(path_to_find, abs_path_to_exclude):
148164
bin_item.source_name_or_path = bin_with_path.replace(
149165
_root_path, '', 1)
150166

167+
if simple_mode and f".{extension}" in compression_extension:
168+
compressed_list.append(bin_item.bin_name_with_path)
169+
151170
if any(dir_name in dir_path for dir_name in _EXCLUDE_DIR):
152171
bin_item.exclude = True
153172
elif file.lower() in _EXCLUDE_FILE:
@@ -156,20 +175,38 @@ def get_file_list(path_to_find, abs_path_to_exclude):
156175
bin_item.exclude = True
157176
bin_list.append(bin_item)
158177
file_cnt += 1
159-
return file_cnt, bin_list, found_jar
178+
return file_cnt, bin_list, found_jar, compressed_list
179+
180+
181+
def exclude_bin_for_simple_mode(binary_list):
182+
for bin in binary_list:
183+
if not (re.search(r".*source\.jar", bin.bin_name_with_path.lower()) or bin.exclude):
184+
yield bin.bin_name_with_path
185+
186+
187+
def log_result_msg(results: list, scan_item: ScannerItem):
188+
for success_to_write, writing_msg, result_file in results:
189+
if success_to_write:
190+
if result_file:
191+
logger.info(f"Output file :{result_file}")
192+
else:
193+
logger.warning(f"{writing_msg}")
194+
for row in scan_item.get_cover_comment():
195+
logger.info(row)
196+
else:
197+
logger.error(f"Fail to generate result file.:{writing_msg}")
160198

161199

162200
def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=False,
163201
correct_mode=True, correct_filepath="", path_to_exclude=[]):
164202

165203
_result_log, result_reports, output_extensions = init(
166-
path_to_find_bin, output_dir, formats, path_to_exclude)
204+
path_to_find_bin, output_dir, formats, path_to_exclude, simple_mode)
167205

168206
total_bin_cnt = 0
169207
total_file_cnt = 0
170208
db_loaded_cnt = 0
171209
success_to_write = False
172-
writing_msg = ""
173210
results = []
174211
bin_list = []
175212
base_dir_name = os.path.basename(path_to_find_bin)
@@ -183,15 +220,16 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
183220
if not correct_filepath:
184221
correct_filepath = path_to_find_bin
185222
try:
186-
total_file_cnt, file_list, found_jar = get_file_list(path_to_find_bin, abs_path_to_exclude)
187-
return_list = list(return_bin_only(file_list))
223+
total_file_cnt, file_list, found_jar, compressed_list = get_file_list(path_to_find_bin, abs_path_to_exclude, simple_mode)
224+
return_list = list(return_bin_only(file_list, True, simple_mode))
188225
except Exception as ex:
189226
error_occured(error_msg=f"Failed to check whether it is binary or not : {ex}",
190227
result_log=_result_log,
191228
exit=True)
192229
total_bin_cnt = len(return_list)
193230
if simple_mode:
194-
bin_list = [bin.bin_name_with_path for bin in return_list]
231+
bin_list = list(exclude_bin_for_simple_mode(return_list))
232+
print_simple_mode(result_reports, compressed_list, bin_list)
195233
else:
196234
scan_item = ScannerItem(PKG_NAME, _start_time)
197235
scan_item.set_cover_pathinfo(path_to_find_bin, path_to_exclude)
@@ -226,17 +264,8 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
226264

227265
except Exception as ex:
228266
error_occured(error_msg=str(ex), exit=False)
229-
230-
for success_to_write, writing_msg, result_file in results:
231-
if success_to_write:
232-
if result_file:
233-
logger.info(f"Output file :{result_file}")
234-
else:
235-
logger.warning(f"{writing_msg}")
236-
for row in scan_item.get_cover_comment():
237-
logger.info(row)
238-
else:
239-
logger.error(f"Fail to generate result file.:{writing_msg}")
267+
268+
log_result_msg(results, scan_item)
240269

241270
try:
242271
print_result_log(success=True, result_log=_result_log,
@@ -249,10 +278,10 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F
249278
return success_to_write, scan_item
250279

251280

252-
def return_bin_only(file_list, need_checksum_tlsh=True):
281+
def return_bin_only(file_list, need_checksum_tlsh=True, simple_mode=False):
253282
for file_item in file_list:
254283
try:
255-
if check_binary(file_item.bin_name_with_path):
284+
if check_binary(file_item.bin_name_with_path, simple_mode):
256285
if need_checksum_tlsh:
257286
file_item.checksum, file_item.tlsh, error_msg = get_checksum_and_tlsh(file_item.bin_name_with_path)
258287
if error_msg:
@@ -264,11 +293,20 @@ def return_bin_only(file_list, need_checksum_tlsh=True):
264293
yield file_item
265294

266295

267-
def check_binary(file_with_path):
296+
def check_binary(file_with_path, simple_mode=False):
268297
is_bin_confirmed = False
269298
file = os.path.basename(file_with_path)
270299
extension = os.path.splitext(file)[1][1:]
271-
if not os.path.islink(file_with_path) and extension.lower() not in _REMOVE_FILE_EXTENSION:
300+
compres_ext_wo_dot = []
301+
302+
if simple_mode:
303+
for ext in compression_extension:
304+
compres_ext_wo_dot.append(ext.lstrip('.'))
305+
remove_file_ext_list = _REMOVE_FILE_EXTENSION + _REMOVE_FILE_EXTENSION_SIMPLE + compres_ext_wo_dot
306+
else:
307+
remove_file_ext_list = _REMOVE_FILE_EXTENSION
308+
309+
if not os.path.islink(file_with_path) and extension.lower() not in remove_file_ext_list:
272310
if stat.S_ISFIFO(os.stat(file_with_path).st_mode):
273311
return False
274312
file_command_result = ""
@@ -324,3 +362,30 @@ def print_result_log(success=True, result_log={}, file_cnt="", bin_file_cnt="",
324362
logger.info(_str_final_result_log)
325363
except Exception as ex:
326364
logger.warning(f"Error to print final log: {ex}")
365+
366+
367+
def convert_list_to_str(input_list):
368+
output_text = '\n'.join(map(str, input_list))
369+
return output_text
370+
371+
372+
def print_simple_mode(result_reports, compressed_list, bin_list):
373+
present_comp_txt = False
374+
present_bin_txt = False
375+
376+
for file in result_reports:
377+
if 'compressed_list_' in file:
378+
present_comp_txt = True
379+
compressed_list_txt = f"{file}.txt"
380+
else:
381+
present_bin_txt = True
382+
simplie_mode_bin_list_txt = f"{file}.txt"
383+
384+
if compressed_list and present_comp_txt:
385+
success, error = write_txt_file(compressed_list_txt, convert_list_to_str(compressed_list))
386+
if not success:
387+
logger.info(f"Error to write compressed list file for simple mode : {error}")
388+
if bin_list and present_bin_txt:
389+
success, error = write_txt_file(simplie_mode_bin_list_txt, convert_list_to_str(bin_list))
390+
if not success:
391+
logger.info(f"Error to write binary list file for simple mode : {error}")

src/fosslight_binary/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def main():
4949
print_package_version(_PKG_NAME, "FOSSLight Binary Scanner Version:")
5050
sys.exit(0)
5151

52-
if args.simple:
52+
if args.simple: # -s option
5353
simple_mode = True
5454

5555
if args.path: # -p option

0 commit comments

Comments
 (0)