Skip to content

Commit df2e604

Browse files
Scancode result cleanup:SPDX-license-identifier
Signed-off-by: Wonjae Park <[email protected]>
1 parent 2d80c3b commit df2e604

File tree

1 file changed

+9
-5
lines changed

1 file changed

+9
-5
lines changed

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,12 @@
2121
os.path.sep for dir_name in _exclude_directory]
2222
_exclude_directory.append("/.")
2323
REMOVE_LICENSE = ["warranty-disclaimer"]
24-
regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE)
24+
regex = re.compile(r'(?:licenseref-|SPDX-license-identifier-)([^",\s]+)', re.IGNORECASE)
2525
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
2626
KEYWORD_SPDX_ID = r'SPDX-License-Identifier\s*[\S]+'
2727
KEYWORD_DOWNLOAD_LOC = r'DownloadLocation\s*[\S]+'
2828
KEYWORD_SCANCODE_UNKNOWN = "unknown-spdx"
29+
KEYWORD_SCANCODE_PROPRIETARY_LICENSE = "proprietary-license"
2930
SPDX_REPLACE_WORDS = ["(", ")"]
3031
KEY_AND = r"(?<=\s)and(?=\s)"
3132
KEY_OR = r"(?<=\s)or(?=\s)"
@@ -132,12 +133,12 @@ def parsing_scancode_32_earlier(scancode_file_list: list, has_error: bool = Fals
132133
license_value = spdx.lower()
133134

134135
if license_value != "":
135-
if key == KEYWORD_SCANCODE_UNKNOWN:
136+
if key == KEYWORD_SCANCODE_UNKNOWN or key == KEYWORD_SCANCODE_PROPRIETARY_LICENSE:
136137
try:
137138
matched_txt = lic_item.get("matched_text", "").lower()
138139
matched = regex.search(matched_txt)
139140
if matched:
140-
license_value = str(matched.group())
141+
license_value = str(matched.group(1))
141142
except Exception:
142143
pass
143144

@@ -229,23 +230,26 @@ def parsing_scancode_32_later(
229230
licenses = file.get("license_detections", [])
230231
if not licenses:
231232
continue
233+
print("file path:", file.get('path', ''))
232234
for lic in licenses:
233235
matched_lic_list = lic.get("matches", [])
234236
for matched_lic in matched_lic_list:
235237
found_lic_list = matched_lic.get("license_expression", "")
238+
print("found_lic_list:", found_lic_list)
236239
matched_txt = matched_lic.get("matched_text", "")
240+
print("matched_txt:", matched_txt)
237241
if found_lic_list:
238242
found_lic_list = found_lic_list.lower()
239243
for found_lic in split_spdx_expression(found_lic_list):
240244
if found_lic:
241245
found_lic = found_lic.strip()
242246
if found_lic in REMOVE_LICENSE:
243247
continue
244-
elif found_lic == KEYWORD_SCANCODE_UNKNOWN:
248+
elif found_lic == KEYWORD_SCANCODE_UNKNOWN or found_lic == KEYWORD_SCANCODE_PROPRIETARY_LICENSE:
245249
try:
246250
matched = regex.search(matched_txt.lower())
247251
if matched:
248-
found_lic = str(matched.group())
252+
found_lic = str(matched.group(1))
249253
except Exception:
250254
pass
251255
for word in replace_word:

0 commit comments

Comments
 (0)