From 3371f34fb1b62d1fbc79a38bac6e19fbb812c247 Mon Sep 17 00:00:00 2001 From: Jaekwon Bang Date: Tue, 8 Oct 2024 17:29:13 +0900 Subject: [PATCH 1/2] Apply simple mode --- src/fosslight_binary/_simple_mode.py | 118 ++++++++++++++++++++++++ src/fosslight_binary/binary_analysis.py | 79 +++++++++------- src/fosslight_binary/cli.py | 2 +- 3 files changed, 166 insertions(+), 33 deletions(-) create mode 100644 src/fosslight_binary/_simple_mode.py diff --git a/src/fosslight_binary/_simple_mode.py b/src/fosslight_binary/_simple_mode.py new file mode 100644 index 0000000..5426ef3 --- /dev/null +++ b/src/fosslight_binary/_simple_mode.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# FOSSLight Binary analysis script +# Copyright (c) 2024 LG Electronics Inc. +# SPDX-License-Identifier: Apache-2.0 +import os +import re +import logging +import zipfile +import tarfile +import fosslight_util.constant as constant +from fosslight_util.write_txt import write_txt_file +from fosslight_util.set_log import init_log + +REMOVE_FILE_EXTENSION_SIMPLE = ['ttf', 'otf', 'png', 'gif', 'jpg', 'bmp', 'jpeg'] +logger = logging.getLogger(constant.LOGGER_NAME) + + +def is_compressed_file(filename): + if filename.lower().endswith('.jar'): + return False + return zipfile.is_zipfile(filename) or tarfile.is_tarfile(filename) + + +def exclude_bin_for_simple_mode(binary_list): + bin_list = [] + compressed_list = [] + + for bin in binary_list: + file_lower_case = bin.bin_name_with_path.lower() + extension = os.path.splitext(file_lower_case)[1][1:].strip() + + if is_compressed_file(bin.bin_name_with_path): + compressed_list.append(bin.bin_name_with_path) + continue + + remove_file_ext_list = REMOVE_FILE_EXTENSION_SIMPLE + if any(extension == remove_ext for remove_ext in remove_file_ext_list): + continue + if re.search(r".*sources\.jar", bin.bin_name_with_path.lower()) or bin.exclude: + continue + + bin_list.append(bin.bin_name_with_path) + return compressed_list, bin_list + + +def convert_list_to_str(input_list): + output_text = '\n'.join(map(str, input_list)) + return output_text + + +def check_output_path(output, start_time): + compressed_list_txt = "" + simple_bin_list_txt = "" + output_path = "" + + if output != "": + if not os.path.isdir(output) and output.endswith('.txt'): + output_path = os.path.dirname(output) + basename = os.path.basename(output) + basename_file, _ = os.path.splitext(basename) + compressed_list_txt = f"{basename_file}_compressed_list.txt" + simple_bin_list_txt = f"{basename_file}.txt" + else: + output_path = output + compressed_list_txt = f"compressed_list_{start_time}.txt" + simple_bin_list_txt = f"binary_list_{start_time}.txt" + else: + compressed_list_txt = f"compressed_list_{start_time}.txt" + simple_bin_list_txt = f"binary_list_{start_time}.txt" + + if output_path == "": + output_path = os.getcwd() + else: + output_path = os.path.abspath(output_path) + + compressed_list_txt = os.path.join(output_path, compressed_list_txt) + simple_bin_list_txt = os.path.join(output_path, simple_bin_list_txt) + + return output_path, compressed_list_txt, simple_bin_list_txt + + +def init_simple(output_file_name, pkg_name, start_time): + global logger, _result_log + + output_path, compressed_list_txt, simple_bin_list_txt = check_output_path(output_file_name, start_time) + + log_file = os.path.join(output_path, f"fosslight_log_bin_{start_time}.txt") + logger, _result_log = init_log(log_file, False, logging.INFO, logging.DEBUG, pkg_name) + + return _result_log, compressed_list_txt, simple_bin_list_txt + + +def print_simple_mode(compressed_list_txt, simple_bin_list_txt, compressed_list, bin_list): + results = [] + success = True + msg = "" + output_file = "" + if compressed_list: + success, error = write_txt_file(compressed_list_txt, convert_list_to_str(compressed_list)) + if success: + output_file = compressed_list_txt + else: + msg = f"Error to write compressed list file for simple mode : {error}" + results.append(tuple([success, msg, output_file])) + if bin_list: + success, error = write_txt_file(simple_bin_list_txt, convert_list_to_str(bin_list)) + if success: + output_file = simple_bin_list_txt + else: + msg = f"Error to write binary list file for simple mode : {error}" + results.append(tuple([success, msg, output_file])) + return results + + +def filter_binary(bin_list): + compressed_list, bin_list = exclude_bin_for_simple_mode(bin_list) + return compressed_list, bin_list diff --git a/src/fosslight_binary/binary_analysis.py b/src/fosslight_binary/binary_analysis.py index d67a691..601bd98 100755 --- a/src/fosslight_binary/binary_analysis.py +++ b/src/fosslight_binary/binary_analysis.py @@ -18,6 +18,7 @@ from ._binary_dao import get_oss_info_from_db from ._binary import BinaryItem, TLSH_CHECKSUM_NULL from ._jar_analysis import analyze_jar_file, merge_binary_list +from ._simple_mode import print_simple_mode, filter_binary, init_simple from fosslight_util.correct import correct_with_yaml from fosslight_util.oss_item import ScannerItem import hashlib @@ -41,7 +42,7 @@ _REMOVE_DIR = [os.path.sep + dir_name + os.path.sep for dir_name in _REMOVE_DIR] _error_logs = [] _root_path = "" -_start_time = "" +start_time = "" windows = False BYTES = 2048 BIN_EXT_HEADER = {'BIN_FL_Binary': ['ID', 'Binary Path', 'OSS Name', @@ -72,18 +73,9 @@ def get_checksum_and_tlsh(bin_with_path): def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]): - global _root_path, logger, _start_time + global logger, _result_log _json_ext = ".json" - _start_time = datetime.now().strftime('%y%m%d_%H%M') - _result_log = { - "Tool Info": PKG_NAME - } - - _root_path = path_to_find_bin - if not path_to_find_bin.endswith(os.path.sep): - _root_path += os.path.sep - success, msg, output_path, output_files, output_extensions, formats = check_output_formats_v2(output_file_name, formats) if success: @@ -100,20 +92,20 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]): if formats: if formats[i].startswith('spdx'): if platform.system() != 'Windows': - output_files[i] = f"fosslight_spdx_bin_{_start_time}" + output_files[i] = f"fosslight_spdx_bin_{start_time}" else: logger.warning('spdx format is not supported on Windows. Please remove spdx from format.') to_remove.append(i) else: if output_extension == _json_ext: - output_files[i] = f"fosslight_opossum_bin_{_start_time}" + output_files[i] = f"fosslight_opossum_bin_{start_time}" else: - output_files[i] = f"fosslight_report_bin_{_start_time}" + output_files[i] = f"fosslight_report_bin_{start_time}" else: if output_extension == _json_ext: - output_files[i] = f"fosslight_opossum_bin_{_start_time}" + output_files[i] = f"fosslight_opossum_bin_{start_time}" else: - output_files[i] = f"fosslight_report_bin_{_start_time}" + output_files[i] = f"fosslight_report_bin_{start_time}" for index in sorted(to_remove, reverse=True): # remove elements of spdx format on windows del output_files[index] @@ -127,7 +119,7 @@ def init(path_to_find_bin, output_file_name, formats, path_to_exclude=[]): logger.error(f"Format error - {msg}") sys.exit(1) - log_file = os.path.join(output_path, f"fosslight_log_bin_{_start_time}.txt") + log_file = os.path.join(output_path, f"fosslight_log_bin_{start_time}.txt") logger, _result_log = init_log(log_file, True, logging.INFO, logging.DEBUG, PKG_NAME, path_to_find_bin, path_to_exclude) @@ -183,9 +175,21 @@ def get_file_list(path_to_find, abs_path_to_exclude): def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=False, correct_mode=True, correct_filepath="", path_to_exclude=[]): + global start_time, _root_path, _result_log + + mode = "Normal Mode" + start_time = datetime.now().strftime('%y%m%d_%H%M') + + _root_path = path_to_find_bin + if not path_to_find_bin.endswith(os.path.sep): + _root_path += os.path.sep - _result_log, result_reports, output_extensions = init( - path_to_find_bin, output_dir, formats, path_to_exclude) + if simple_mode: + mode = "Simple Mode" + _result_log, compressed_list_txt, simple_bin_list_txt = init_simple(output_dir, PKG_NAME, start_time) + else: + _result_log, result_reports, output_extensions = init( + path_to_find_bin, output_dir, formats, path_to_exclude) total_bin_cnt = 0 total_file_cnt = 0 @@ -201,7 +205,8 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F if not os.path.isdir(path_to_find_bin): error_occured(error_msg=f"Can't find the directory : {path_to_find_bin}", result_log=_result_log, - exit=True) + exit=True, + mode=mode) if not correct_filepath: correct_filepath = path_to_find_bin try: @@ -210,12 +215,21 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F except Exception as ex: error_occured(error_msg=f"Failed to check whether it is binary or not : {ex}", result_log=_result_log, - exit=True) - total_bin_cnt = len(return_list) + exit=True, + mode=mode) if simple_mode: - bin_list = [bin.bin_name_with_path for bin in return_list] + try: + compressed_list, filtered_bin_list = filter_binary(return_list) + results = print_simple_mode(compressed_list_txt, simple_bin_list_txt, compressed_list, filtered_bin_list) + total_bin_cnt = len(filtered_bin_list) + except Exception as ex: + error_occured(error_msg=f"Failed to run simple mode: {ex}", + result_log=_result_log, + exit=True, + mode="Simple mode") else: - scan_item = ScannerItem(PKG_NAME, _start_time) + total_bin_cnt = len(return_list) + scan_item = ScannerItem(PKG_NAME, start_time) scan_item.set_cover_pathinfo(path_to_find_bin, path_to_exclude) try: # Run OWASP Dependency-check @@ -262,7 +276,7 @@ def find_binaries(path_to_find_bin, output_dir, formats, dburl="", simple_mode=F logger.error(f"Fail to generate result file.:{writing_msg}") try: - print_result_log(success=True, result_log=_result_log, + print_result_log(mode=mode, success=True, result_log=_result_log, file_cnt=str(total_file_cnt), bin_file_cnt=str(total_bin_cnt), auto_bin_cnt=str(db_loaded_cnt), bin_list=bin_list) @@ -279,7 +293,7 @@ def return_bin_only(file_list, need_checksum_tlsh=True): if need_checksum_tlsh: file_item.checksum, file_item.tlsh, error_msg = get_checksum_and_tlsh(file_item.bin_name_with_path) if error_msg: - error_occured(error_msg=error_msg, exit=False) + error_occured(modeerror_msg=error_msg, exit=False) yield file_item except Exception as ex: logger.debug(f"Exception in get_file_list: {ex}") @@ -317,21 +331,22 @@ def check_binary(file_with_path): return is_bin_confirmed -def error_occured(error_msg, exit=False, result_log={}): +def error_occured(error_msg, exit=False, result_log={}, mode="Normal mode"): global _error_logs _error_logs.append(error_msg) if exit: - print_result_log(success=False, result_log=result_log) + print_result_log(mode, success=False, result_log=result_log) sys.exit() -def print_result_log(success=True, result_log={}, file_cnt="", bin_file_cnt="", auto_bin_cnt="", bin_list=[]): +def print_result_log(mode="Normal Mode", success=True, result_log={}, file_cnt="", bin_file_cnt="", auto_bin_cnt="", bin_list=[]): if "Running time" in result_log: - start_time = result_log["Running time"] + starttime = result_log["Running time"] else: - start_time = _start_time - result_log["Running time"] = start_time + " ~ " + \ + starttime = start_time + result_log["Mode"] = mode + result_log["Running time"] = starttime + " ~ " + \ datetime.now().strftime('%Y%m%d_%H%M%S') result_log["Execution result"] = 'Success' if success else 'Error occurred' result_log["Binaries / Scanned files"] = f"{bin_file_cnt}/{file_cnt}" diff --git a/src/fosslight_binary/cli.py b/src/fosslight_binary/cli.py index 95eb6e4..8d13be0 100644 --- a/src/fosslight_binary/cli.py +++ b/src/fosslight_binary/cli.py @@ -49,7 +49,7 @@ def main(): print_package_version(_PKG_NAME, "FOSSLight Binary Scanner Version:") sys.exit(0) - if args.simple: + if args.simple: # -s option simple_mode = True if args.path: # -p option From 1b52f8275a6569e5a7fffdb749d4374d8238b2c0 Mon Sep 17 00:00:00 2001 From: Jaekwon Bang Date: Fri, 11 Oct 2024 13:33:57 +0900 Subject: [PATCH 2/2] Exclude file beginning with . --- src/fosslight_binary/binary_analysis.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/fosslight_binary/binary_analysis.py b/src/fosslight_binary/binary_analysis.py index 601bd98..596e27c 100755 --- a/src/fosslight_binary/binary_analysis.py +++ b/src/fosslight_binary/binary_analysis.py @@ -168,6 +168,8 @@ def get_file_list(path_to_find, abs_path_to_exclude): bin_item.exclude = True elif extension in _EXCLUDE_FILE_EXTENSION: bin_item.exclude = True + elif file.startswith('.'): + bin_item.exclude = True bin_list.append(bin_item) file_cnt += 1 return file_cnt, bin_list, found_jar