diff --git a/requirements.txt b/requirements.txt index 56d94c0..290a4db 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,4 +6,6 @@ PyYAML wheel>=0.38.1 intbitset fosslight_binary>=5.0.0 -scancode-toolkit>=32.0.2 \ No newline at end of file +scancode-toolkit>=32.0.2 +fingerprints==1.2.3 +normality==2.6.1 \ No newline at end of file diff --git a/src/fosslight_source/_parsing_scancode_file_item.py b/src/fosslight_source/_parsing_scancode_file_item.py index 1724be3..6d01686 100755 --- a/src/fosslight_source/_parsing_scancode_file_item.py +++ b/src/fosslight_source/_parsing_scancode_file_item.py @@ -14,6 +14,7 @@ from ._scan_item import replace_word from ._scan_item import is_notice_file from ._scan_item import is_manifest_file +from ._scan_item import is_package_dir from typing import Tuple logger = logging.getLogger(constant.LOGGER_NAME) @@ -99,6 +100,13 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals copyright_list = file.get("copyrights", []) result_item = SourceItem(file_path) + is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path)) + if is_pkg: + result_item.source_name_or_path = pkg_path + if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item): + result_item.exclude = True + scancode_file_item.append(result_item) + continue if has_error and "scan_errors" in file: error_msg = file.get("scan_errors", []) @@ -235,6 +243,13 @@ def parsing_scancode_32_later( continue result_item = SourceItem(file_path) + is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path)) + if is_pkg: + result_item.source_name_or_path = pkg_path + if not any(x.source_name_or_path == result_item.source_name_or_path for x in scancode_file_item): + result_item.exclude = True + scancode_file_item.append(result_item) + continue if has_error: error_msg = file.get("scan_errors", []) diff --git a/src/fosslight_source/_parsing_scanoss_file.py b/src/fosslight_source/_parsing_scanoss_file.py index 4a2b42a..389faf7 100644 --- a/src/fosslight_source/_parsing_scanoss_file.py +++ b/src/fosslight_source/_parsing_scanoss_file.py @@ -8,6 +8,7 @@ import fosslight_util.constant as constant from ._scan_item import SourceItem from ._scan_item import is_exclude_file +from ._scan_item import is_package_dir from ._scan_item import replace_word from typing import Tuple @@ -45,6 +46,13 @@ def parsing_scanResult(scanoss_report: dict, path_to_scan: str = "", path_to_exc if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path for exclude_path in abs_path_to_exclude): continue result_item = SourceItem(file_path) + is_pkg, pkg_path = is_package_dir(os.path.dirname(file_path)) + if is_pkg: + result_item.source_name_or_path = pkg_path + if not any(x.source_name_or_path == result_item.source_name_or_path for x in scanoss_file_item): + result_item.exclude = True + scanoss_file_item.append(result_item) + continue if 'id' in findings[0]: if "none" == findings[0]['id']: diff --git a/src/fosslight_source/_scan_item.py b/src/fosslight_source/_scan_item.py index bb9609b..8fe42e0 100644 --- a/src/fosslight_source/_scan_item.py +++ b/src/fosslight_source/_scan_item.py @@ -22,6 +22,7 @@ _exclude_directory = [os.path.sep + dir_name + os.path.sep for dir_name in _exclude_directory] _exclude_directory.append("/.") +_package_directory = ["node_modules", "venv", "Pods", "Carthage"] MAX_LICENSE_LENGTH = 200 MAX_LICENSE_TOTAL_LENGTH = 600 SUBSTRING_LICENSE_COMMENT = "Maximum character limit (License)" @@ -146,3 +147,13 @@ def is_manifest_file(file_path: str) -> bool: pattern = r"({})$".format("|".join(_manifest_filename)) filename = os.path.basename(file_path) return bool(re.match(pattern, filename, re.IGNORECASE)) + + +def is_package_dir(dir_path: str) -> bool: + path_parts = dir_path.split(os.path.sep) + for pkg_dir in _package_directory: + if pkg_dir in path_parts: + pkg_index = path_parts.index(pkg_dir) + pkg_path = os.path.sep.join(path_parts[:pkg_index + 1]) + return True, pkg_path + return False, "" diff --git a/src/fosslight_source/run_scancode.py b/src/fosslight_source/run_scancode.py index a3d6904..d8b25ed 100755 --- a/src/fosslight_source/run_scancode.py +++ b/src/fosslight_source/run_scancode.py @@ -127,6 +127,8 @@ def run_scan( result_list, key=lambda row: (''.join(row.licenses))) for scan_item in result_list: + if os.path.isdir(scan_item.source_name_or_path): + continue if check_binary(os.path.join(path_to_scan, scan_item.source_name_or_path)): scan_item.exclude = True except Exception as ex: