1313from ._scan_item import is_exclude_file
1414from ._scan_item import replace_word
1515from ._scan_item import is_notice_file
16+ from ._scan_item import is_manifest_file
17+ from ._scan_item import is_package_dir
1618from typing import Tuple
1719
1820logger = logging .getLogger (constant .LOGGER_NAME )
2931SPDX_REPLACE_WORDS = ["(" , ")" ]
3032KEY_AND = r"(?<=\s)and(?=\s)"
3133KEY_OR = r"(?<=\s)or(?=\s)"
34+ GPL_LICENSE_PATTERN = r'((a|l)?gpl|gfdl)' # GPL, LGPL, AGPL, GFDL
35+
36+
37+ def is_gpl_family_license (licenses : list ) -> bool :
38+ if not licenses :
39+ return False
40+
41+ for license_name in licenses :
42+ if not license_name :
43+ continue
44+
45+ license_lower = license_name .lower ()
46+ if re .search (GPL_LICENSE_PATTERN , license_lower ):
47+ logger .debug (f"GPL family license detected: { license_name } " )
48+ return True
49+
50+ return False
51+
52+
53+ def should_remove_copyright_for_gpl_license_text (licenses : list , is_license_text : bool ) -> bool :
54+ return is_license_text and is_gpl_family_license (licenses )
3255
3356
3457def get_error_from_header (header_item : list ) -> Tuple [bool , str ]:
@@ -77,6 +100,13 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
77100 copyright_list = file .get ("copyrights" , [])
78101
79102 result_item = SourceItem (file_path )
103+ is_pkg , pkg_path = is_package_dir (os .path .dirname (file_path ))
104+ if is_pkg :
105+ result_item .source_name_or_path = pkg_path
106+ if not any (x .source_name_or_path == result_item .source_name_or_path for x in scancode_file_item ):
107+ result_item .exclude = True
108+ scancode_file_item .append (result_item )
109+ continue
80110
81111 if has_error and "scan_errors" in file :
82112 error_msg = file .get ("scan_errors" , [])
@@ -99,8 +129,6 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
99129 pass
100130 copyright_value_list .append (copyright_data )
101131
102- result_item .copyright = copyright_value_list
103-
104132 # Set the license value
105133 license_detected = []
106134 if licenses is None or licenses == "" :
@@ -164,6 +192,16 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
164192 if len (license_detected ) > 0 :
165193 result_item .licenses = license_detected
166194
195+ if is_manifest_file (file_path ):
196+ result_item .is_manifest_file = True
197+
198+ # Remove copyright info for license text file of GPL family
199+ if should_remove_copyright_for_gpl_license_text (license_detected , result_item .is_license_text ):
200+ logger .debug (f"Removing copyright for GPL family license text file: { file_path } " )
201+ result_item .copyright = []
202+ else :
203+ result_item .copyright = copyright_value_list
204+
167205 if len (license_expression_list ) > 0 :
168206 license_expression_list = list (
169207 set (license_expression_list ))
@@ -205,6 +243,13 @@ def parsing_scancode_32_later(
205243 continue
206244
207245 result_item = SourceItem (file_path )
246+ is_pkg , pkg_path = is_package_dir (os .path .dirname (file_path ))
247+ if is_pkg :
248+ result_item .source_name_or_path = pkg_path
249+ if not any (x .source_name_or_path == result_item .source_name_or_path for x in scancode_file_item ):
250+ result_item .exclude = True
251+ scancode_file_item .append (result_item )
252+ continue
208253
209254 if has_error :
210255 error_msg = file .get ("scan_errors" , [])
@@ -223,7 +268,6 @@ def parsing_scancode_32_later(
223268 except Exception :
224269 pass
225270 copyright_value_list .append (copyright_data )
226- result_item .copyright = copyright_value_list
227271
228272 license_detected = []
229273 licenses = file .get ("license_detections" , [])
@@ -259,6 +303,20 @@ def parsing_scancode_32_later(
259303 license_list [lic_matched_key ] = lic_info
260304 license_detected .append (found_lic )
261305 result_item .licenses = license_detected
306+
307+ result_item .exclude = is_exclude_file (file_path )
308+ result_item .is_license_text = file .get ("percentage_of_license_text" , 0 ) > 90 or is_notice_file (file_path )
309+
310+ if is_manifest_file (file_path ) and len (license_detected ) > 0 :
311+ result_item .is_manifest_file = True
312+
313+ # Remove copyright info for license text file of GPL family
314+ if should_remove_copyright_for_gpl_license_text (license_detected , result_item .is_license_text ):
315+ logger .debug (f"Removing copyright for GPL family license text file: { file_path } " )
316+ result_item .copyright = []
317+ else :
318+ result_item .copyright = copyright_value_list
319+
262320 if len (license_detected ) > 1 :
263321 license_expression_spdx = file .get ("detected_license_expression_spdx" , "" )
264322 license_expression = file .get ("detected_license_expression" , "" )
@@ -267,8 +325,6 @@ def parsing_scancode_32_later(
267325 if license_expression :
268326 result_item .comment = license_expression
269327
270- result_item .exclude = is_exclude_file (file_path )
271- result_item .is_license_text = file .get ("percentage_of_license_text" , 0 ) > 90 or is_notice_file (file_path )
272328 scancode_file_item .append (result_item )
273329 except Exception as ex :
274330 msg .append (f"Error Parsing item: { ex } " )
0 commit comments