1919_exclude_directory = [os .path .sep + dir_name +
2020 os .path .sep for dir_name in _exclude_directory ]
2121_exclude_directory .append ("/." )
22- remove_license = ["warranty-disclaimer" ]
23-
22+ REMOVE_LICENSE = ["warranty-disclaimer" ]
23+ regex = re .compile (r'licenseref-(\S+)' , re .IGNORECASE )
24+ find_word = re .compile (rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)" , re .IGNORECASE )
25+ KEYWORD_SPDX_ID = r'SPDX-License-Identifier\s*[\S]+'
26+ KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+'
27+ KEYWORD_SCANCODE_UNKNOWN = "unknown-spdx"
28+ SPDX_REPLACE_WORDS = ["(" , ")" ]
29+ KEY_AND = "and"
30+ KEY_OR = "or"
2431
2532def get_error_from_header (header_item ):
2633 has_error = False
@@ -41,17 +48,13 @@ def get_error_from_header(header_item):
4148 return has_error , str_error
4249
4350
44- def parsing_file_item (scancode_file_list , has_error , path_to_scan , need_matched_license = False ):
45-
51+ def parsing_scancode_32_earlier (scancode_file_list , path_to_scan , has_error = False ):
4652 rc = True
53+ msg = []
4754 scancode_file_item = []
4855 license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
49- msg = []
50-
5156 prev_dir = ""
5257 prev_dir_value = False
53- regex = re .compile (r'licenseref-(\S+)' , re .IGNORECASE )
54- find_word = re .compile (rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)" , re .IGNORECASE )
5558
5659 if scancode_file_list :
5760 for file in scancode_file_list :
@@ -100,8 +103,8 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
100103 copyright_data = x .get ("value" , "" )
101104 if copyright_data :
102105 try :
103- copyright_data = re .sub (r'SPDX-License-Identifier\s*[\S]+' , '' , copyright_data , flags = re .I )
104- copyright_data = re .sub (r'DownloadLocation\s*[\S]+' , '' , copyright_data , flags = re .I ).strip ()
106+ copyright_data = re .sub (KEYWORD_SPDX_ID , '' , copyright_data , flags = re .I )
107+ copyright_data = re .sub (KEYWORD_DOWNLOAD_LOC , '' , copyright_data , flags = re .I ).strip ()
105108 except Exception :
106109 pass
107110 copyright_value_list .append (copyright_data )
@@ -122,7 +125,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
122125 for lic_item in licenses :
123126 license_value = ""
124127 key = lic_item .get ("key" , "" )
125- if key in remove_license :
128+ if key in REMOVE_LICENSE :
126129 if key in license_expression_list :
127130 license_expression_list .remove (key )
128131 continue
@@ -139,7 +142,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
139142 license_value = spdx .lower ()
140143
141144 if license_value != "" :
142- if key == "unknown-spdx" :
145+ if key == KEYWORD_SCANCODE_UNKNOWN :
143146 try :
144147 matched_txt = lic_item .get ("matched_text" , "" )
145148 matched = regex .search (matched_txt )
@@ -154,7 +157,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
154157 license_detected .append (license_value )
155158
156159 # Add matched licenses
157- if need_matched_license and "category" in lic_item :
160+ if "category" in lic_item :
158161 lic_category = lic_item ["category" ]
159162 if "matched_text" in lic_item :
160163 lic_matched_text = lic_item ["matched_text" ]
@@ -184,3 +187,122 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
184187 rc = False
185188 msg = list (set (msg ))
186189 return rc , scancode_file_item , msg , license_list
190+
191+
192+ def split_spdx_expression (spdx_string ):
193+ license = []
194+ for replace in SPDX_REPLACE_WORDS :
195+ spdx_string = spdx_string .replace (replace , "" )
196+ spdx_string = spdx_string .replace (KEY_OR , KEY_AND )
197+ license = spdx_string .split (KEY_AND )
198+ return license
199+
200+
201+ def parsing_scancode_32_later (scancode_file_list , path_to_scan , has_error = False ):
202+ rc = True
203+ msg = []
204+ scancode_file_item = []
205+ license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
206+
207+ if scancode_file_list :
208+ for file in scancode_file_list :
209+ file_path = file .get ("path" , "" )
210+ is_binary = file .get ("is_binary" , False )
211+ is_dir = file .get ("type" , "" ) == "directory"
212+ if (not file_path ) or is_binary or is_dir :
213+ continue
214+
215+ result_item = ScanItem (file_path )
216+
217+ if has_error :
218+ error_msg = file .get ("scan_errors" , [])
219+ if error_msg :
220+ result_item .comment = "," .join (error_msg )
221+ scancode_file_item .append (result_item )
222+ continue
223+
224+ url_list = []
225+ if file .get ("urls" , []):
226+ with open (os .path .join (path_to_scan , file_path ), "r" ) as f :
227+ with mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ ) as mmap_obj :
228+ for word in find_word .findall (mmap_obj ):
229+ url_list .append (word .decode ('utf-8' ))
230+ result_item .download_location = url_list
231+
232+ copyright_value_list = []
233+ for x in file .get ("copyrights" , []):
234+ copyright_data = x .get ("copyright" , "" )
235+ if copyright_data :
236+ try :
237+ copyright_data = re .sub (KEYWORD_SPDX_ID , '' , copyright_data , flags = re .I )
238+ copyright_data = re .sub (KEYWORD_DOWNLOAD_LOC , '' , copyright_data , flags = re .I ).strip ()
239+ except Exception :
240+ pass
241+ copyright_value_list .append (copyright_data )
242+ result_item .copyright = copyright_value_list
243+
244+ license_detected = []
245+ licenses = file .get ("license_detections" , [])
246+ if not licenses :
247+ continue
248+ for lic in licenses :
249+ matched_lic_list = lic .get ("matches" , [])
250+ for matched_lic in matched_lic_list :
251+ found_lic_list = matched_lic .get ("license_expression" , "" )
252+ matched_txt = matched_lic .get ("matched_text" , "" )
253+ if found_lic_list :
254+ found_lic_list = found_lic_list .lower ()
255+ for found_lic in split_spdx_expression (found_lic_list ):
256+ if found_lic :
257+ found_lic = found_lic .strip ()
258+ if found_lic in REMOVE_LICENSE :
259+ continue
260+ elif found_lic == KEYWORD_SCANCODE_UNKNOWN :
261+ try :
262+ matched = regex .search (matched_txt .lower ())
263+ if matched :
264+ found_lic = str (matched .group ())
265+ except Exception :
266+ pass
267+ for word in replace_word :
268+ found_lic = found_lic .replace (word , "" )
269+ if matched_txt :
270+ lic_matched_key = found_lic + matched_txt
271+ if lic_matched_key in license_list :
272+ license_list [lic_matched_key ].set_files (file_path )
273+ else :
274+ lic_info = MatchedLicense (found_lic , "" , matched_txt , file_path )
275+ license_list [lic_matched_key ] = lic_info
276+ license_detected .append (found_lic )
277+ result_item .licenses = license_detected
278+ if len (license_detected ) > 1 :
279+ license_expression_spdx = file .get ("detected_license_expression_spdx" , "" )
280+ license_expression = file .get ("detected_license_expression" , "" )
281+ if license_expression_spdx :
282+ license_expression = license_expression_spdx
283+ if license_expression :
284+ result_item .comment = license_expression
285+
286+ result_item .exclude = is_exclude_file (file_path )
287+ result_item .is_license_text = file .get ("percentage_of_license_text" , 0 ) > 90
288+ scancode_file_item .append (result_item )
289+
290+ return rc , scancode_file_item , msg , license_list
291+
292+
293+ def parsing_file_item (scancode_file_list , has_error , path_to_scan , need_matched_license = False ):
294+
295+ rc = True
296+ msg = []
297+
298+ first_item = next (iter (scancode_file_list or []), None )
299+ if "licenses" in first_item :
300+ rc , scancode_file_item , msg , license_list = parsing_scancode_32_earlier (scancode_file_list ,
301+ path_to_scan , has_error )
302+ else :
303+ rc , scancode_file_item , msg , license_list = parsing_scancode_32_later (scancode_file_list ,
304+ path_to_scan , has_error )
305+ if not need_matched_license :
306+ license_list = {}
307+ return rc , scancode_file_item , msg , license_list
308+
0 commit comments