Skip to content

Commit 16ec508

Browse files
Read download location (#124)
1 parent 176c76f commit 16ec508

File tree

9 files changed

+239
-36
lines changed

9 files changed

+239
-36
lines changed

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import re
99
import fosslight_util.constant as constant
10+
import mmap
1011
from ._license_matched import MatchedLicense
1112
from ._scan_item import ScanItem
1213
from ._scan_item import is_exclude_dir
@@ -40,7 +41,7 @@ def get_error_from_header(header_item):
4041
return has_error, str_error
4142

4243

43-
def parsing_file_item(scancode_file_list, has_error, need_matched_license=False):
44+
def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False):
4445

4546
rc = True
4647
scancode_file_item = []
@@ -50,6 +51,7 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False)
5051
prev_dir = ""
5152
prev_dir_value = False
5253
regex = re.compile(r'licenseref-(\S+)', re.IGNORECASE)
54+
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
5355

5456
if scancode_file_list:
5557
for file in scancode_file_list:
@@ -71,6 +73,18 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False)
7173

7274
result_item = ScanItem(file_path)
7375

76+
fullpath = os.path.join(path_to_scan, file_path)
77+
78+
urls = file.get("urls", [])
79+
url_list = []
80+
81+
if urls:
82+
with open(fullpath, "r") as f:
83+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
84+
for word in find_word.findall(mmap_obj):
85+
url_list.append(word.decode('utf-8'))
86+
result_item.download_location = url_list
87+
7488
if has_error and "scan_errors" in file:
7589
error_msg = file.get("scan_errors", [])
7690
if len(error_msg) > 0:
@@ -165,7 +179,6 @@ def parsing_file_item(scancode_file_list, has_error, need_matched_license=False)
165179
if is_exclude_file(file_path, prev_dir, prev_dir_value):
166180
result_item.exclude = True
167181
scancode_file_item.append(result_item)
168-
169182
except Exception as ex:
170183
msg.append(f"Error Parsing item: {ex}")
171184
rc = False

src/fosslight_source/_parsing_scanoss_file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def parsing_scanResult(scanoss_report):
4848
if 'version' in findings[0]:
4949
result_item.oss_version = findings[0]['version']
5050
if 'url' in findings[0]:
51-
result_item.download_location = findings[0]['url']
51+
result_item.download_location = list([findings[0]['url']])
5252

5353
license_detected = []
5454
license_w_source = {"component_declared": [], "file_spdx_tag": [],

src/fosslight_source/_scan_item.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class ScanItem:
2727
is_license_text = False
2828
oss_name = ""
2929
oss_version = ""
30-
download_location = ""
30+
download_location = []
3131
matched_lines = "" # Only for SCANOSS results
3232
fileURL = "" # Only for SCANOSS results
3333
license_reference = ""
@@ -36,6 +36,7 @@ def __init__(self, value):
3636
self.file = value
3737
self._copyright = []
3838
self._licenses = []
39+
self.download_location = []
3940
self.comment = ""
4041
self.exclude = False
4142
self.is_license_text = False
@@ -63,23 +64,42 @@ def licenses(self, value):
6364
if len(self._licenses) > 0:
6465
self._licenses = list(set(self._licenses))
6566

67+
def get_file(self):
68+
return self.file
69+
6670
def get_row_to_print(self):
67-
print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "",
68-
','.join(self.copyright),
69-
"Exclude" if self.exclude else "",
70-
self.comment]
71+
print_rows = []
72+
if not self.download_location:
73+
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),
74+
"", "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
75+
else:
76+
for url in self.download_location:
77+
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),
78+
url, "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
7179
return print_rows
7280

7381
def get_row_to_print_for_scanoss(self):
74-
print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "",
75-
','.join(self.copyright),
76-
"Exclude" if self.exclude else "", self.comment]
82+
print_rows = []
83+
if not self.download_location:
84+
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "",
85+
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
86+
else:
87+
for url in self.download_location:
88+
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "",
89+
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
7790
return print_rows
7891

7992
def get_row_to_print_for_all_scanner(self):
80-
print_rows = [self.file, self.oss_name, self.oss_version, ','.join(self.licenses), self.download_location, "",
81-
','.join(self.copyright),
82-
"Exclude" if self.exclude else "", self.comment, self.license_reference]
93+
print_rows = []
94+
if not self.download_location:
95+
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "",
96+
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment,
97+
self.license_reference])
98+
else:
99+
for url in self.download_location:
100+
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "",
101+
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment,
102+
self.license_reference])
83103
return print_rows
84104

85105
def merge_scan_item(self, other):
@@ -104,7 +124,7 @@ def merge_scan_item(self, other):
104124
if not self.oss_version:
105125
self.oss_version = other.oss_version
106126
if not self.download_location:
107-
self.download_location = other.download_location
127+
self.download_location = list(other.download_location)
108128
if not self.matched_lines:
109129
self.matched_lines = other.matched_lines
110130
if not self.fileURL:

src/fosslight_source/cli.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ def main():
5252

5353
scanned_result = []
5454
license_list = []
55+
scanoss_result = []
5556
time_out = 120
5657
core = -1
5758

@@ -117,19 +118,16 @@ def main():
117118
success, _result_log["Scan Result"], scanned_result, license_list = run_scan(path_to_scan, output_file_name,
118119
write_json_file, core, True,
119120
print_matched_text, format, True,
120-
time_out, correct_mode,
121-
correct_filepath)
121+
time_out, correct_mode, correct_filepath)
122122
elif selected_scanner == 'scanoss':
123123
scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file)
124124
elif selected_scanner == 'all' or selected_scanner == '':
125-
success, _result_log["Scan Result"], scanned_result, license_list = run_all_scanners(path_to_scan, output_file_name,
126-
write_json_file, core,
127-
print_matched_text, format, True,
128-
time_out)
125+
success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners(
126+
path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out)
129127
else:
130128
print_help_msg_source_scanner()
131129
sys.exit(1)
132-
create_report_file(_start_time, scanned_result, license_list, selected_scanner, print_matched_text,
130+
create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, print_matched_text,
133131
output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan)
134132
try:
135133
logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True))
@@ -140,7 +138,7 @@ def main():
140138
sys.exit(1)
141139

142140

143-
def create_report_file(_start_time, scanned_result, license_list, selected_scanner, need_license=False,
141+
def create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, need_license=False,
144142
output_path="", output_file="", output_extension="", correct_mode=True, correct_filepath="",
145143
path_to_scan=""):
146144
"""
@@ -173,24 +171,33 @@ def create_report_file(_start_time, scanned_result, license_list, selected_scann
173171
scanned_result = sorted(scanned_result, key=lambda row: (''.join(row.licenses)))
174172

175173
if selected_scanner == 'scancode' or output_extension == _json_ext:
176-
sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print() for scan_item in scanned_result]
174+
sheet_list[SCANOSS_SHEET_NAME] = []
175+
for scan_item in scanned_result:
176+
for row in scan_item.get_row_to_print():
177+
sheet_list[SCANOSS_SHEET_NAME].append(row)
177178

178179
elif selected_scanner == 'scanoss':
179-
sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_scanoss() for scan_item in scanned_result]
180+
sheet_list[SCANOSS_SHEET_NAME] = []
181+
for scan_item in scanned_result:
182+
for row in scan_item.get_row_to_print_for_scanoss():
183+
sheet_list[SCANOSS_SHEET_NAME].append(row)
180184
extended_header = SCANOSS_HEADER
181185

182186
else:
183-
sheet_list[SCANOSS_SHEET_NAME] = [scan_item.get_row_to_print_for_all_scanner() for scan_item in scanned_result]
187+
sheet_list[SCANOSS_SHEET_NAME] = []
188+
for scan_item in scanned_result:
189+
for row in scan_item.get_row_to_print_for_all_scanner():
190+
sheet_list[SCANOSS_SHEET_NAME].append(row)
184191
extended_header = MERGED_HEADER
185192

186193
if need_license:
187194
if selected_scanner == 'scancode' or output_extension == _json_ext:
188195
sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
189196
elif selected_scanner == 'scanoss':
190-
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result)
197+
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
191198
else:
192199
sheet_list["scancode_reference"] = get_license_list_to_print(license_list)
193-
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanned_result)
200+
sheet_list["scanoss_reference"] = get_scanoss_extra_info(scanoss_result)
194201

195202
if correct_mode:
196203
success, msg_correct, correct_list = correct_with_yaml(correct_filepath, path_to_scan, sheet_list)
@@ -242,16 +249,17 @@ def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False,
242249
False, "")
243250
scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, called_by_cli, _write_json_file)
244251

252+
scanoss_result_for_merging = copy.deepcopy(scanoss_result)
245253
for file_in_scancode_result in scancode_result:
246254
per_file_result = copy.deepcopy(file_in_scancode_result)
247-
if per_file_result in scanoss_result:
248-
per_file_result.merge_scan_item(scanoss_result.pop(scanoss_result.index(file_in_scancode_result)))
255+
if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist
256+
scanoss_result_for_merging.pop(scanoss_result_for_merging.index(file_in_scancode_result))
249257
merged_result.append(per_file_result)
250-
if scanoss_result:
251-
for file_left_in_scanoss_result in scanoss_result:
258+
if scanoss_result_for_merging:
259+
for file_left_in_scanoss_result in scanoss_result_for_merging:
252260
merged_result.append(file_left_in_scanoss_result)
253261

254-
return success, _result_log["Scan Result"], merged_result, license_list
262+
return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result
255263

256264

257265
if __name__ == '__main__':

src/fosslight_source/run_scancode.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def run_scan(path_to_scan, output_file_name="",
7575
processes=num_cores,
7676
output_json_pp=output_json_file,
7777
only_findings=True, license_text=True,
78-
timeout=time_out)
78+
url=True, timeout=time_out)
7979

8080
if not rc:
8181
msg = "Source code analysis failed."
@@ -90,7 +90,8 @@ def run_scan(path_to_scan, output_file_name="",
9090
_result_log["Error_files"] = error_msg
9191
msg = "Failed to analyze :" + error_msg
9292
if "files" in results:
93-
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"], has_error, need_license)
93+
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"],
94+
has_error, path_to_scan, need_license)
9495
if parsing_msg:
9596
_result_log["Parsing Log"] = parsing_msg
9697
if rc:

src/fosslight_source/run_scanoss.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=F
6868
if num_threads > 0:
6969
scan_command += " -T " + str(num_threads)
7070
else:
71-
scan_command += " -T " + "30"
71+
scan_command += " -T " + "10"
7272

7373
try:
7474
os.system(scan_command)

tests/test_files/run_scancode.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
# Copyright (c) 2020 LG Electronics Inc.
44
# SPDX-License-Identifier: Apache-2.0
55

6+
# SPDX-PackageDownloadLocation: https://dummy_url_for_test.com
7+
# The code is not licensed under GPL-2.0.
8+
69
import sys
710
import os
811
import multiprocessing

0 commit comments

Comments
 (0)