1919_exclude_directory = [os .path .sep + dir_name +
2020 os .path .sep for dir_name in _exclude_directory ]
2121_exclude_directory .append ("/." )
22- remove_license = ["warranty-disclaimer" ]
22+ REMOVE_LICENSE = ["warranty-disclaimer" ]
23+ regex = re .compile (r'licenseref-(\S+)' , re .IGNORECASE )
24+ find_word = re .compile (rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)" , re .IGNORECASE )
25+ KEYWORD_SPDX_ID = r'SPDX-License-Identifier\s*[\S]+'
26+ KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+'
27+ KEYWORD_SCANCODE_UNKNOWN = "unknown-spdx"
28+ SPDX_REPLACE_WORDS = ["(" , ")" ]
29+ KEY_AND = "and"
30+ KEY_OR = "or"
2331
2432
2533def get_error_from_header (header_item ):
@@ -41,17 +49,13 @@ def get_error_from_header(header_item):
4149 return has_error , str_error
4250
4351
44- def parsing_file_item (scancode_file_list , has_error , path_to_scan , need_matched_license = False ):
45-
52+ def parsing_scancode_32_earlier (scancode_file_list , path_to_scan , has_error = False ):
4653 rc = True
54+ msg = []
4755 scancode_file_item = []
4856 license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
49- msg = []
50-
5157 prev_dir = ""
5258 prev_dir_value = False
53- regex = re .compile (r'licenseref-(\S+)' , re .IGNORECASE )
54- find_word = re .compile (rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)" , re .IGNORECASE )
5559
5660 if scancode_file_list :
5761 for file in scancode_file_list :
@@ -100,8 +104,8 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
100104 copyright_data = x .get ("value" , "" )
101105 if copyright_data :
102106 try :
103- copyright_data = re .sub (r'SPDX-License-Identifier\s*[\S]+' , '' , copyright_data , flags = re .I )
104- copyright_data = re .sub (r'DownloadLocation\s*[\S]+' , '' , copyright_data , flags = re .I ).strip ()
107+ copyright_data = re .sub (KEYWORD_SPDX_ID , '' , copyright_data , flags = re .I )
108+ copyright_data = re .sub (KEYWORD_DOWNLOAD_LOC , '' , copyright_data , flags = re .I ).strip ()
105109 except Exception :
106110 pass
107111 copyright_value_list .append (copyright_data )
@@ -122,7 +126,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
122126 for lic_item in licenses :
123127 license_value = ""
124128 key = lic_item .get ("key" , "" )
125- if key in remove_license :
129+ if key in REMOVE_LICENSE :
126130 if key in license_expression_list :
127131 license_expression_list .remove (key )
128132 continue
@@ -139,9 +143,9 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
139143 license_value = spdx .lower ()
140144
141145 if license_value != "" :
142- if key == "unknown-spdx" :
146+ if key == KEYWORD_SCANCODE_UNKNOWN :
143147 try :
144- matched_txt = lic_item .get ("matched_text" , "" )
148+ matched_txt = lic_item .get ("matched_text" , "" ). lower ()
145149 matched = regex .search (matched_txt )
146150 if matched :
147151 license_value = str (matched .group ())
@@ -154,7 +158,7 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
154158 license_detected .append (license_value )
155159
156160 # Add matched licenses
157- if need_matched_license and "category" in lic_item :
161+ if "category" in lic_item :
158162 lic_category = lic_item ["category" ]
159163 if "matched_text" in lic_item :
160164 lic_matched_text = lic_item ["matched_text" ]
@@ -184,3 +188,125 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
184188 rc = False
185189 msg = list (set (msg ))
186190 return rc , scancode_file_item , msg , license_list
191+
192+
193+ def split_spdx_expression (spdx_string ):
194+ license = []
195+ for replace in SPDX_REPLACE_WORDS :
196+ spdx_string = spdx_string .replace (replace , "" )
197+ spdx_string = spdx_string .replace (KEY_OR , KEY_AND )
198+ license = spdx_string .split (KEY_AND )
199+ return license
200+
201+
202+ def parsing_scancode_32_later (scancode_file_list , path_to_scan , has_error = False ):
203+ rc = True
204+ msg = []
205+ scancode_file_item = []
206+ license_list = {} # Key :[license]+[matched_text], value: MatchedLicense()
207+
208+ if scancode_file_list :
209+ for file in scancode_file_list :
210+ try :
211+ file_path = file .get ("path" , "" )
212+ is_binary = file .get ("is_binary" , False )
213+ is_dir = file .get ("type" , "" ) == "directory"
214+ if (not file_path ) or is_binary or is_dir :
215+ continue
216+
217+ result_item = ScanItem (file_path )
218+
219+ if has_error :
220+ error_msg = file .get ("scan_errors" , [])
221+ if error_msg :
222+ result_item .comment = "," .join (error_msg )
223+ scancode_file_item .append (result_item )
224+ continue
225+
226+ url_list = []
227+ if file .get ("urls" , []):
228+ with open (os .path .join (path_to_scan , file_path ), "r" ) as f :
229+ with mmap .mmap (f .fileno (), 0 , access = mmap .ACCESS_READ ) as mmap_obj :
230+ for word in find_word .findall (mmap_obj ):
231+ url_list .append (word .decode ('utf-8' ))
232+ result_item .download_location = url_list
233+
234+ copyright_value_list = []
235+ for x in file .get ("copyrights" , []):
236+ copyright_data = x .get ("copyright" , "" )
237+ if copyright_data :
238+ try :
239+ copyright_data = re .sub (KEYWORD_SPDX_ID , '' , copyright_data , flags = re .I )
240+ copyright_data = re .sub (KEYWORD_DOWNLOAD_LOC , '' , copyright_data , flags = re .I ).strip ()
241+ except Exception :
242+ pass
243+ copyright_value_list .append (copyright_data )
244+ result_item .copyright = copyright_value_list
245+
246+ license_detected = []
247+ licenses = file .get ("license_detections" , [])
248+ if not licenses :
249+ continue
250+ for lic in licenses :
251+ matched_lic_list = lic .get ("matches" , [])
252+ for matched_lic in matched_lic_list :
253+ found_lic_list = matched_lic .get ("license_expression" , "" )
254+ matched_txt = matched_lic .get ("matched_text" , "" )
255+ if found_lic_list :
256+ found_lic_list = found_lic_list .lower ()
257+ for found_lic in split_spdx_expression (found_lic_list ):
258+ if found_lic :
259+ found_lic = found_lic .strip ()
260+ if found_lic in REMOVE_LICENSE :
261+ continue
262+ elif found_lic == KEYWORD_SCANCODE_UNKNOWN :
263+ try :
264+ matched = regex .search (matched_txt .lower ())
265+ if matched :
266+ found_lic = str (matched .group ())
267+ except Exception :
268+ pass
269+ for word in replace_word :
270+ found_lic = found_lic .replace (word , "" )
271+ if matched_txt :
272+ lic_matched_key = found_lic + matched_txt
273+ if lic_matched_key in license_list :
274+ license_list [lic_matched_key ].set_files (file_path )
275+ else :
276+ lic_info = MatchedLicense (found_lic , "" , matched_txt , file_path )
277+ license_list [lic_matched_key ] = lic_info
278+ license_detected .append (found_lic )
279+ result_item .licenses = license_detected
280+ if len (license_detected ) > 1 :
281+ license_expression_spdx = file .get ("detected_license_expression_spdx" , "" )
282+ license_expression = file .get ("detected_license_expression" , "" )
283+ if license_expression_spdx :
284+ license_expression = license_expression_spdx
285+ if license_expression :
286+ result_item .comment = license_expression
287+
288+ result_item .exclude = is_exclude_file (file_path )
289+ result_item .is_license_text = file .get ("percentage_of_license_text" , 0 ) > 90
290+ scancode_file_item .append (result_item )
291+ except Exception as ex :
292+ msg .append (f"Error Parsing item: { ex } " )
293+ rc = False
294+
295+ return rc , scancode_file_item , msg , license_list
296+
297+
298+ def parsing_file_item (scancode_file_list , has_error , path_to_scan , need_matched_license = False ):
299+
300+ rc = True
301+ msg = []
302+
303+ first_item = next (iter (scancode_file_list or []), None )
304+ if "licenses" in first_item :
305+ rc , scancode_file_item , msg , license_list = parsing_scancode_32_earlier (scancode_file_list ,
306+ path_to_scan , has_error )
307+ else :
308+ rc , scancode_file_item , msg , license_list = parsing_scancode_32_later (scancode_file_list ,
309+ path_to_scan , has_error )
310+ if not need_matched_license :
311+ license_list = {}
312+ return rc , scancode_file_item , msg , license_list
0 commit comments