Skip to content

Commit 88a323a

Browse files
committed
Add an exception handler for parsing
1 parent 15d2f78 commit 88a323a

File tree

1 file changed

+83
-79
lines changed

1 file changed

+83
-79
lines changed

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 83 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
KEY_AND = "and"
3030
KEY_OR = "or"
3131

32+
3233
def get_error_from_header(header_item):
3334
has_error = False
3435
str_error = ""
@@ -206,86 +207,90 @@ def parsing_scancode_32_later(scancode_file_list, path_to_scan, has_error=False)
206207

207208
if scancode_file_list:
208209
for file in scancode_file_list:
209-
file_path = file.get("path", "")
210-
is_binary = file.get("is_binary", False)
211-
is_dir = file.get("type", "") == "directory"
212-
if (not file_path) or is_binary or is_dir:
213-
continue
214-
215-
result_item = ScanItem(file_path)
216-
217-
if has_error:
218-
error_msg = file.get("scan_errors", [])
219-
if error_msg:
220-
result_item.comment = ",".join(error_msg)
221-
scancode_file_item.append(result_item)
210+
try:
211+
file_path = file.get("path", "")
212+
is_binary = file.get("is_binary", False)
213+
is_dir = file.get("type", "") == "directory"
214+
if (not file_path) or is_binary or is_dir:
222215
continue
223216

224-
url_list = []
225-
if file.get("urls", []):
226-
with open(os.path.join(path_to_scan, file_path), "r") as f:
227-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
228-
for word in find_word.findall(mmap_obj):
229-
url_list.append(word.decode('utf-8'))
230-
result_item.download_location = url_list
231-
232-
copyright_value_list = []
233-
for x in file.get("copyrights", []):
234-
copyright_data = x.get("copyright", "")
235-
if copyright_data:
236-
try:
237-
copyright_data = re.sub(KEYWORD_SPDX_ID, '', copyright_data, flags=re.I)
238-
copyright_data = re.sub(KEYWORD_DOWNLOAD_LOC, '', copyright_data, flags=re.I).strip()
239-
except Exception:
240-
pass
241-
copyright_value_list.append(copyright_data)
242-
result_item.copyright = copyright_value_list
243-
244-
license_detected = []
245-
licenses = file.get("license_detections", [])
246-
if not licenses:
247-
continue
248-
for lic in licenses:
249-
matched_lic_list = lic.get("matches", [])
250-
for matched_lic in matched_lic_list:
251-
found_lic_list = matched_lic.get("license_expression", "")
252-
matched_txt = matched_lic.get("matched_text", "")
253-
if found_lic_list:
254-
found_lic_list = found_lic_list.lower()
255-
for found_lic in split_spdx_expression(found_lic_list):
256-
if found_lic:
257-
found_lic = found_lic.strip()
258-
if found_lic in REMOVE_LICENSE:
259-
continue
260-
elif found_lic == KEYWORD_SCANCODE_UNKNOWN:
261-
try:
262-
matched = regex.search(matched_txt.lower())
263-
if matched:
264-
found_lic = str(matched.group())
265-
except Exception:
266-
pass
267-
for word in replace_word:
268-
found_lic = found_lic.replace(word, "")
269-
if matched_txt:
270-
lic_matched_key = found_lic + matched_txt
271-
if lic_matched_key in license_list:
272-
license_list[lic_matched_key].set_files(file_path)
273-
else:
274-
lic_info = MatchedLicense(found_lic, "", matched_txt, file_path)
275-
license_list[lic_matched_key] = lic_info
276-
license_detected.append(found_lic)
277-
result_item.licenses = license_detected
278-
if len(license_detected) > 1:
279-
license_expression_spdx = file.get("detected_license_expression_spdx", "")
280-
license_expression = file.get("detected_license_expression", "")
281-
if license_expression_spdx:
282-
license_expression = license_expression_spdx
283-
if license_expression:
284-
result_item.comment = license_expression
285-
286-
result_item.exclude = is_exclude_file(file_path)
287-
result_item.is_license_text = file.get("percentage_of_license_text", 0) > 90
288-
scancode_file_item.append(result_item)
217+
result_item = ScanItem(file_path)
218+
219+
if has_error:
220+
error_msg = file.get("scan_errors", [])
221+
if error_msg:
222+
result_item.comment = ",".join(error_msg)
223+
scancode_file_item.append(result_item)
224+
continue
225+
226+
url_list = []
227+
if file.get("urls", []):
228+
with open(os.path.join(path_to_scan, file_path), "r") as f:
229+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
230+
for word in find_word.findall(mmap_obj):
231+
url_list.append(word.decode('utf-8'))
232+
result_item.download_location = url_list
233+
234+
copyright_value_list = []
235+
for x in file.get("copyrights", []):
236+
copyright_data = x.get("copyright", "")
237+
if copyright_data:
238+
try:
239+
copyright_data = re.sub(KEYWORD_SPDX_ID, '', copyright_data, flags=re.I)
240+
copyright_data = re.sub(KEYWORD_DOWNLOAD_LOC, '', copyright_data, flags=re.I).strip()
241+
except Exception:
242+
pass
243+
copyright_value_list.append(copyright_data)
244+
result_item.copyright = copyright_value_list
245+
246+
license_detected = []
247+
licenses = file.get("license_detections", [])
248+
if not licenses:
249+
continue
250+
for lic in licenses:
251+
matched_lic_list = lic.get("matches", [])
252+
for matched_lic in matched_lic_list:
253+
found_lic_list = matched_lic.get("license_expression", "")
254+
matched_txt = matched_lic.get("matched_text", "")
255+
if found_lic_list:
256+
found_lic_list = found_lic_list.lower()
257+
for found_lic in split_spdx_expression(found_lic_list):
258+
if found_lic:
259+
found_lic = found_lic.strip()
260+
if found_lic in REMOVE_LICENSE:
261+
continue
262+
elif found_lic == KEYWORD_SCANCODE_UNKNOWN:
263+
try:
264+
matched = regex.search(matched_txt.lower())
265+
if matched:
266+
found_lic = str(matched.group())
267+
except Exception:
268+
pass
269+
for word in replace_word:
270+
found_lic = found_lic.replace(word, "")
271+
if matched_txt:
272+
lic_matched_key = found_lic + matched_txt
273+
if lic_matched_key in license_list:
274+
license_list[lic_matched_key].set_files(file_path)
275+
else:
276+
lic_info = MatchedLicense(found_lic, "", matched_txt, file_path)
277+
license_list[lic_matched_key] = lic_info
278+
license_detected.append(found_lic)
279+
result_item.licenses = license_detected
280+
if len(license_detected) > 1:
281+
license_expression_spdx = file.get("detected_license_expression_spdx", "")
282+
license_expression = file.get("detected_license_expression", "")
283+
if license_expression_spdx:
284+
license_expression = license_expression_spdx
285+
if license_expression:
286+
result_item.comment = license_expression
287+
288+
result_item.exclude = is_exclude_file(file_path)
289+
result_item.is_license_text = file.get("percentage_of_license_text", 0) > 90
290+
scancode_file_item.append(result_item)
291+
except Exception as ex:
292+
msg.append(f"Error Parsing item: {ex}")
293+
rc = False
289294

290295
return rc, scancode_file_item, msg, license_list
291296

@@ -305,4 +310,3 @@ def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_
305310
if not need_matched_license:
306311
license_list = {}
307312
return rc, scancode_file_item, msg, license_list
308-

0 commit comments

Comments
 (0)