Skip to content

Commit 1a446a5

Browse files
Priority change between Download Location extraction and scanner operation (#133)
1 parent ef52577 commit 1a446a5

File tree

6 files changed

+128
-159
lines changed

6 files changed

+128
-159
lines changed

src/fosslight_source/_parsing_scancode_file_item.py

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import logging
88
import re
99
import fosslight_util.constant as constant
10-
import mmap
1110
from ._license_matched import MatchedLicense
1211
from ._scan_item import ScanItem
1312
from ._scan_item import is_exclude_dir
@@ -49,7 +48,7 @@ def get_error_from_header(header_item):
4948
return has_error, str_error
5049

5150

52-
def parsing_scancode_32_earlier(scancode_file_list, path_to_scan, has_error=False):
51+
def parsing_scancode_32_earlier(scancode_file_list, has_error=False):
5352
rc = True
5453
msg = []
5554
scancode_file_item = []
@@ -77,18 +76,6 @@ def parsing_scancode_32_earlier(scancode_file_list, path_to_scan, has_error=Fals
7776

7877
result_item = ScanItem(file_path)
7978

80-
fullpath = os.path.join(path_to_scan, file_path)
81-
82-
urls = file.get("urls", [])
83-
url_list = []
84-
85-
if urls:
86-
with open(fullpath, "r") as f:
87-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
88-
for word in find_word.findall(mmap_obj):
89-
url_list.append(word.decode('utf-8'))
90-
result_item.download_location = url_list
91-
9279
if has_error and "scan_errors" in file:
9380
error_msg = file.get("scan_errors", [])
9481
if len(error_msg) > 0:
@@ -199,7 +186,7 @@ def split_spdx_expression(spdx_string):
199186
return license
200187

201188

202-
def parsing_scancode_32_later(scancode_file_list, path_to_scan, has_error=False):
189+
def parsing_scancode_32_later(scancode_file_list, has_error=False):
203190
rc = True
204191
msg = []
205192
scancode_file_item = []
@@ -223,14 +210,6 @@ def parsing_scancode_32_later(scancode_file_list, path_to_scan, has_error=False)
223210
scancode_file_item.append(result_item)
224211
continue
225212

226-
url_list = []
227-
if file.get("urls", []):
228-
with open(os.path.join(path_to_scan, file_path), "r") as f:
229-
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mmap_obj:
230-
for word in find_word.findall(mmap_obj):
231-
url_list.append(word.decode('utf-8'))
232-
result_item.download_location = url_list
233-
234213
copyright_value_list = []
235214
for x in file.get("copyrights", []):
236215
copyright_data = x.get("copyright", "")
@@ -295,18 +274,16 @@ def parsing_scancode_32_later(scancode_file_list, path_to_scan, has_error=False)
295274
return rc, scancode_file_item, msg, license_list
296275

297276

298-
def parsing_file_item(scancode_file_list, has_error, path_to_scan, need_matched_license=False):
277+
def parsing_file_item(scancode_file_list, has_error, need_matched_license=False):
299278

300279
rc = True
301280
msg = []
302281

303282
first_item = next(iter(scancode_file_list or []), {})
304283
if "licenses" in first_item:
305-
rc, scancode_file_item, msg, license_list = parsing_scancode_32_earlier(scancode_file_list,
306-
path_to_scan, has_error)
284+
rc, scancode_file_item, msg, license_list = parsing_scancode_32_earlier(scancode_file_list, has_error)
307285
else:
308-
rc, scancode_file_item, msg, license_list = parsing_scancode_32_later(scancode_file_list,
309-
path_to_scan, has_error)
286+
rc, scancode_file_item, msg, license_list = parsing_scancode_32_later(scancode_file_list, has_error)
310287
if not need_matched_license:
311288
license_list = {}
312289
return rc, scancode_file_item, msg, license_list

src/fosslight_source/_scan_item.py

Lines changed: 7 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ def __init__(self, value):
4444
def __del__(self):
4545
pass
4646

47+
def __hash__(self):
48+
return hash(self.file)
49+
4750
@property
4851
def copyright(self):
4952
return self._copyright
@@ -68,28 +71,6 @@ def get_file(self):
6871
return self.file
6972

7073
def get_row_to_print(self):
71-
print_rows = []
72-
if not self.download_location:
73-
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),
74-
"", "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
75-
else:
76-
for url in self.download_location:
77-
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses),
78-
url, "", ','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
79-
return print_rows
80-
81-
def get_row_to_print_for_scanoss(self):
82-
print_rows = []
83-
if not self.download_location:
84-
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "",
85-
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
86-
else:
87-
for url in self.download_location:
88-
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), url, "",
89-
','.join(self.copyright), "Exclude" if self.exclude else "", self.comment])
90-
return print_rows
91-
92-
def get_row_to_print_for_all_scanner(self):
9374
print_rows = []
9475
if not self.download_location:
9576
print_rows.append([self.file, self.oss_name, self.oss_version, ','.join(self.licenses), "", "",
@@ -102,38 +83,11 @@ def get_row_to_print_for_all_scanner(self):
10283
self.license_reference])
10384
return print_rows
10485

105-
def merge_scan_item(self, other):
106-
"""
107-
Merge two ScanItem instance into one.
108-
"""
109-
if sorted(self.licenses) != sorted(other.licenses):
110-
self.license_reference = f"(Scancode) {', '.join(self.licenses)} / (Scanoss) {', '.join(other.licenses)}"
111-
112-
self.licenses = list(set(self.licenses + other.licenses))
113-
114-
if len(self.copyright) > 0:
115-
self.copyright = list(set(self.copyright))
116-
117-
if self.exclude and other.exclude:
118-
self.exclude = True
119-
else:
120-
self.exclude = False
121-
122-
if not self.oss_name:
123-
self.oss_name = other.oss_name
124-
if not self.oss_version:
125-
self.oss_version = other.oss_version
126-
if not self.download_location:
127-
self.download_location = list(other.download_location)
128-
if not self.matched_lines:
129-
self.matched_lines = other.matched_lines
130-
if not self.fileURL:
131-
self.fileURL = other.fileURL
132-
if not self.scanoss_reference:
133-
self.scanoss_reference = other.scanoss_reference
134-
13586
def __eq__(self, other):
136-
return self.file == other.file
87+
if type(other) == str:
88+
return self.file == other
89+
else:
90+
return self.file == other.file
13791

13892

13993
def is_exclude_dir(dir_path):

src/fosslight_source/cli.py

Lines changed: 64 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import os
88
import warnings
99
import logging
10-
import copy
1110
from datetime import datetime
1211
import fosslight_util.constant as constant
1312
from fosslight_util.set_log import init_log
@@ -21,16 +20,17 @@
2120
from .run_scanoss import get_scanoss_extra_info
2221
import yaml
2322
import argparse
24-
25-
SCANOSS_SHEET_NAME = 'SRC_FL_Source'
26-
SCANOSS_HEADER = {SCANOSS_SHEET_NAME: ['ID', 'Source Name or Path', 'OSS Name',
27-
'OSS Version', 'License', 'Download Location',
28-
'Homepage', 'Copyright Text', 'Exclude',
29-
'Comment']}
30-
MERGED_HEADER = {SCANOSS_SHEET_NAME: ['ID', 'Source Name or Path', 'OSS Name',
31-
'OSS Version', 'License', 'Download Location',
32-
'Homepage', 'Copyright Text', 'Exclude',
33-
'Comment', 'license_reference']}
23+
from .run_spdx_extractor import get_spdx_downloads
24+
from ._scan_item import ScanItem
25+
26+
SRC_SHEET_NAME = 'SRC_FL_Source'
27+
SCANOSS_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Name or Path', 'OSS Name',
28+
'OSS Version', 'License', 'Download Location',
29+
'Homepage', 'Copyright Text', 'Exclude', 'Comment']}
30+
MERGED_HEADER = {SRC_SHEET_NAME: ['ID', 'Source Name or Path', 'OSS Name',
31+
'OSS Version', 'License', 'Download Location',
32+
'Homepage', 'Copyright Text', 'Exclude', 'Comment', 'license_reference']}
33+
SCANNER_TYPE = ['scancode', 'scanoss', 'all', '']
3434

3535
logger = logging.getLogger(constant.LOGGER_NAME)
3636
warnings.filterwarnings("ignore", category=FutureWarning)
@@ -50,7 +50,6 @@ def main():
5050
selected_scanner = ""
5151
correct_mode = True
5252

53-
scanned_result = []
5453
license_list = []
5554
scanoss_result = []
5655
time_out = 120
@@ -114,21 +113,28 @@ def main():
114113
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan)
115114

116115
if os.path.isdir(path_to_scan):
117-
if selected_scanner == 'scancode':
118-
success, _result_log["Scan Result"], scanned_result, license_list = run_scan(path_to_scan, output_file_name,
119-
write_json_file, core, True,
120-
print_matched_text, format, True,
121-
time_out, correct_mode, correct_filepath)
122-
elif selected_scanner == 'scanoss':
123-
scanned_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file)
124-
elif selected_scanner == 'all' or selected_scanner == '':
125-
success, _result_log["Scan Result"], scanned_result, license_list, scanoss_result = run_all_scanners(
126-
path_to_scan, output_file_name, write_json_file, core, print_matched_text, format, True, time_out)
127-
else:
116+
scancode_result = []
117+
scanoss_result = []
118+
merged_result = []
119+
spdx_downloads = {}
120+
success = True
121+
122+
if selected_scanner == 'scancode' or selected_scanner == 'all' or selected_scanner == '':
123+
success, _result_log["Scan Result"], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
124+
write_json_file, core, True,
125+
print_matched_text, format, True,
126+
time_out, correct_mode,
127+
correct_filepath)
128+
if selected_scanner == 'scanoss' or selected_scanner == 'all' or selected_scanner == '':
129+
scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file)
130+
if selected_scanner not in SCANNER_TYPE:
128131
print_help_msg_source_scanner()
129132
sys.exit(1)
130-
create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, print_matched_text,
133+
spdx_downloads = get_spdx_downloads(path_to_scan)
134+
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads)
135+
create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, print_matched_text,
131136
output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan)
137+
132138
try:
133139
logger.info(yaml.safe_dump(_result_log, allow_unicode=True, sort_keys=True))
134140
except Exception as ex:
@@ -138,7 +144,7 @@ def main():
138144
sys.exit(1)
139145

140146

141-
def create_report_file(_start_time, scanned_result, license_list, scanoss_result, selected_scanner, need_license=False,
147+
def create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, need_license=False,
142148
output_path="", output_file="", output_extension="", correct_mode=True, correct_filepath="",
143149
path_to_scan=""):
144150
"""
@@ -167,25 +173,25 @@ def create_report_file(_start_time, scanned_result, license_list, scanoss_result
167173
else:
168174
output_file = f"fosslight_report_src_{_start_time}"
169175

170-
if scanned_result:
176+
if merged_result:
171177
if selected_scanner == 'scancode' or output_extension == _json_ext:
172-
sheet_list[SCANOSS_SHEET_NAME] = []
173-
for scan_item in scanned_result:
178+
sheet_list[SRC_SHEET_NAME] = []
179+
for scan_item in merged_result:
174180
for row in scan_item.get_row_to_print():
175-
sheet_list[SCANOSS_SHEET_NAME].append(row)
181+
sheet_list[SRC_SHEET_NAME].append(row)
176182

177183
elif selected_scanner == 'scanoss':
178-
sheet_list[SCANOSS_SHEET_NAME] = []
179-
for scan_item in scanned_result:
180-
for row in scan_item.get_row_to_print_for_scanoss():
181-
sheet_list[SCANOSS_SHEET_NAME].append(row)
184+
sheet_list[SRC_SHEET_NAME] = []
185+
for scan_item in merged_result:
186+
for row in scan_item.get_row_to_print():
187+
sheet_list[SRC_SHEET_NAME].append(row)
182188
extended_header = SCANOSS_HEADER
183189

184190
else:
185-
sheet_list[SCANOSS_SHEET_NAME] = []
186-
for scan_item in scanned_result:
187-
for row in scan_item.get_row_to_print_for_all_scanner():
188-
sheet_list[SCANOSS_SHEET_NAME].append(row)
191+
sheet_list[SRC_SHEET_NAME] = []
192+
for scan_item in merged_result:
193+
for row in scan_item.get_row_to_print():
194+
sheet_list[SRC_SHEET_NAME].append(row)
189195
extended_header = MERGED_HEADER
190196

191197
if need_license:
@@ -217,47 +223,30 @@ def create_report_file(_start_time, scanned_result, license_list, scanoss_result
217223
logger.error(f"Fail to generate result file. msg:({writing_msg})")
218224

219225

220-
def run_all_scanners(path_to_scan, output_file_name="", _write_json_file=False, num_cores=-1,
221-
need_license=False, format="", called_by_cli=True, time_out=120):
226+
def merge_results(scancode_result=[], scanoss_result=[], spdx_downloads={}):
222227
"""
223-
Run Scancode and scanoss.py for the given path.
224-
225-
:param path_to_scan: path of sourcecode to scan.
226-
:param output_file_name: path or file name (with path) for the output.
227-
:param _write_json_file: if requested, keep the raw files.
228-
:param num_cores: number of cores used for scancode scanning.
229-
:param need_license: if requested, output matched text (only for scancode).
230-
:param format: output format (excel, csv, opossum).
231-
:param called_by_cli: if not called by cli, initialize logger.
232-
:return success: success or failure of scancode.
233-
:return _result_log["Scan Result"]:
234-
:return merged_result: merged scan result of scancode and scanoss.
235-
:return license_list: matched text.(only for scancode)
228+
Merge scanner results and spdx parsing result.
229+
:param scancode_result: list of scancode results in ScanItem.
230+
:param scanoss_result: list of scanoss results in ScanItem.
231+
:param spdx_downloads: dictionary of spdx parsed results.
232+
:return merged_result: list of merged result in ScanItem.
236233
"""
237-
scancode_result = []
238-
scanoss_result = []
239-
merged_result = []
240-
_result_log = {}
241-
success = True
242234

243-
success, _result_log["Scan Result"], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
244-
_write_json_file, num_cores,
245-
True, need_license,
246-
format, called_by_cli, time_out,
247-
False, "")
248-
scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, called_by_cli, _write_json_file)
249-
250-
scanoss_result_for_merging = copy.deepcopy(scanoss_result)
251-
for file_in_scancode_result in scancode_result:
252-
per_file_result = copy.deepcopy(file_in_scancode_result)
253-
if per_file_result in scanoss_result_for_merging: # Remove SCANOSS result if Scancode result exist
254-
scanoss_result_for_merging.pop(scanoss_result_for_merging.index(file_in_scancode_result))
255-
merged_result.append(per_file_result)
256-
if scanoss_result_for_merging:
257-
for file_left_in_scanoss_result in scanoss_result_for_merging:
258-
merged_result.append(file_left_in_scanoss_result)
259-
260-
return success, _result_log["Scan Result"], merged_result, license_list, scanoss_result
235+
# If anything that is found at SCANOSS only exist, add it to result.
236+
scancode_result.extend([item for item in scanoss_result if item not in scancode_result])
237+
238+
# If download loc. in SPDX form found, overwrite the scanner result.
239+
# If scanner result doesn't exist, create a new row.
240+
if spdx_downloads:
241+
for file_name, download_location in spdx_downloads.items():
242+
if file_name in scancode_result:
243+
merged_result_item = scancode_result[scancode_result.index(file_name)]
244+
merged_result_item.download_location = download_location
245+
else:
246+
new_result_item = ScanItem(file_name)
247+
new_result_item.download_location = download_location
248+
scancode_result.append(new_result_item)
249+
return scancode_result
261250

262251

263252
if __name__ == '__main__':

src/fosslight_source/run_scancode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def run_scan(path_to_scan, output_file_name="",
9191
msg = "Failed to analyze :" + error_msg
9292
if "files" in results:
9393
rc, result_list, parsing_msg, license_list = parsing_file_item(results["files"],
94-
has_error, path_to_scan, need_license)
94+
has_error, need_license)
9595
if parsing_msg:
9696
_result_log["Parsing Log"] = parsing_msg
9797
if rc:

0 commit comments

Comments
 (0)