Skip to content

Commit 4ec250a

Browse files
committed
Update scanner with ossinfo
Signed-off-by: jiyeong.seok <[email protected]>
1 parent c1dca69 commit 4ec250a

File tree

2 files changed

+83
-175
lines changed

2 files changed

+83
-175
lines changed

src/fosslight_scanner/common.py

Lines changed: 44 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,13 @@
99
import shutil
1010
import pandas as pd
1111
import yaml
12-
13-
import fosslight_util.constant as constant
12+
from fosslight_util.constant import LOGGER_NAME, FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY
1413
from fosslight_util.parsing_yaml import parsing_yml
15-
from fosslight_util.write_yaml import create_yaml_with_ossitem
1614
from fosslight_util.write_scancodejson import write_scancodejson
1715
from fosslight_util.read_excel import read_oss_report
18-
from fosslight_util.output_format import write_output_file
1916
from fosslight_util.oss_item import OssItem
2017

21-
logger = logging.getLogger(constant.LOGGER_NAME)
18+
logger = logging.getLogger(LOGGER_NAME)
2219
SRC_SHEET = 'SRC_FL_Source'
2320
BIN_SHEET = 'BIN_FL_Binary'
2421
BIN_EXT_HEADER = {
@@ -93,65 +90,18 @@ def call_analysis_api(path_to_run, str_run_start, return_idx, func, *args, **kwa
9390
return success, result or []
9491

9592

96-
def overwrite_excel(excel_file_path, oss_name, column_name='OSS Name'):
97-
if oss_name:
98-
try:
99-
files = os.listdir(excel_file_path)
100-
for file in files:
101-
if file.endswith(".xlsx"):
102-
file_path = os.path.join(excel_file_path, file)
103-
excel_file = pd.ExcelFile(file_path, engine='openpyxl')
104-
105-
for sheet_name in excel_file.sheet_names:
106-
try:
107-
df = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl')
108-
if column_name in df.columns:
109-
updated = (df[column_name] == '') | (df[column_name].isnull())
110-
df.loc[updated, column_name] = oss_name
111-
df.to_excel(file_path, sheet_name=sheet_name, index=False)
112-
except Exception as ex:
113-
logger.debug(f"overwrite_sheet {sheet_name}:{ex}")
114-
except Exception as ex:
115-
logger.debug(f"overwrite_excel:{ex}")
116-
117-
118-
def merge_yamls(_output_dir, merge_yaml_files, final_report, remove_src_data=False,
119-
default_oss_name='', default_oss_version='', url=''):
120-
success = True
121-
err_msg = ''
122-
123-
oss_total_list = []
124-
yaml_dict = {}
125-
try:
126-
for mf in merge_yaml_files:
127-
if os.path.exists(os.path.join(_output_dir, mf)):
128-
oss_list, _, _ = parsing_yml(os.path.join(_output_dir, mf), _output_dir)
129-
130-
if remove_src_data:
131-
existed_yaml = {}
132-
for oi in oss_list:
133-
oi.name = default_oss_name if oi.name == '' else oi.name
134-
oi.version = default_oss_version if oi.version == '' else oi.version
135-
oi.download_location = url if oi.download_location == '' else oi.download_location
136-
create_yaml_with_ossitem(oi, existed_yaml)
137-
with open(os.path.join(_output_dir, mf), 'w') as f:
138-
yaml.dump(existed_yaml, f, default_flow_style=False, sort_keys=False)
139-
140-
oss_total_list.extend(oss_list)
141-
142-
if oss_total_list != []:
143-
for oti in oss_total_list:
144-
create_yaml_with_ossitem(oti, yaml_dict)
145-
with open(os.path.join(_output_dir, final_report), 'w') as f:
146-
yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
147-
else:
148-
success = False
149-
err_msg = "Output file is not created as no oss items detected."
150-
except Exception as ex:
151-
err_msg = ex
152-
success = False
153-
154-
return success, err_msg
93+
def update_oss_item(scan_item, oss_name, oss_version, download_loc):
94+
for file_items in scan_item.file_items.values():
95+
for file_item in file_items:
96+
if file_item.oss_items:
97+
for oi in file_item.oss_items:
98+
if oi.name == '' and oi.version == '' and oi.download_location == '':
99+
oi.name = oss_name
100+
oi.version = oss_version
101+
oi.download_location = download_loc
102+
else:
103+
file_item.oss_items.append(OssItem(oss_name, oss_version, '', download_loc))
104+
return scan_item
155105

156106

157107
def create_scancodejson(final_report, output_extension, ui_mode_report, src_path=""):
@@ -199,73 +149,48 @@ def create_scancodejson(final_report, output_extension, ui_mode_report, src_path
199149
return success, err_msg
200150

201151

202-
def correct_scanner_result(_output_dir, output_files, output_extension, exist_src, exist_bin):
203-
src_oss_list = []
204-
bin_oss_list = []
152+
def correct_scanner_result(all_scan_item):
205153
duplicates = False
206154

207-
if exist_src:
208-
src_oss_list = check_exclude_dir(get_osslist(_output_dir, output_files['SRC'], output_extension, SRC_SHEET))
209-
if exist_bin:
210-
bin_oss_list = check_exclude_dir(get_osslist(_output_dir, output_files['BIN'], output_extension, BIN_SHEET))
211-
212-
if exist_src and exist_bin:
155+
keys_needed = {FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY}
156+
is_contained = keys_needed.issubset(all_scan_item.file_items.keys())
157+
if is_contained:
158+
src_fileitems = all_scan_item.file_items[FOSSLIGHT_SOURCE]
159+
bin_fileitems = all_scan_item.file_items[FOSSLIGHT_BINARY]
213160
try:
214161
remove_src_idx_list = []
215-
for idx_src, src_item in enumerate(src_oss_list):
162+
for idx_src, src_fileitem in enumerate(src_fileitems):
163+
src_fileitem.exclude = check_exclude_dir(src_fileitem.source_name_or_path)
216164
dup_flag = False
217-
for bin_item in bin_oss_list:
218-
if (not src_item.source_name_or_path):
219-
continue
220-
if src_item.source_name_or_path[0] == bin_item.source_name_or_path[0]:
165+
for bin_fileitem in bin_fileitems:
166+
bin_fileitem.exclude = check_exclude_dir(bin_fileitem.source_name_or_path)
167+
if src_fileitem.source_name_or_path == bin_fileitem.source_name_or_path:
221168
dup_flag = True
222-
if not bin_item.license and src_item.license:
223-
src_item.exclude = bin_item.exclude
224-
bin_item.set_sheet_item(src_item.get_print_array(constant.FL_BINARY)[0])
225-
if bin_item.comment:
226-
bin_item.comment += '/'
227-
bin_item.comment += 'Loaded from SRC OSS info'
169+
src_all_licenses_non_empty = all(oss_item.license for oss_item in src_fileitem.oss_items)
170+
bin_empty_license_exists = all(not oss_item.license for oss_item in bin_fileitem.oss_items)
171+
172+
if src_all_licenses_non_empty and bin_empty_license_exists:
173+
exclude = bin_fileitem.oss_items[0].exclude
174+
bin_fileitem.oss_items = []
175+
for src_oss_item in src_fileitem.oss_items:
176+
src_oss_item.exclude = exclude
177+
bin_fileitem.oss_items.append(src_oss_item)
178+
bin_fileitem.comment = 'Loaded from SRC OSS info'
228179
if dup_flag:
229180
remove_src_idx_list.append(idx_src)
230181
if remove_src_idx_list:
231182
duplicates = True
232183
for i in sorted(remove_src_idx_list, reverse=True):
233-
del src_oss_list[i]
184+
del src_fileitems[i]
234185
except Exception as ex:
235186
logger.warning(f"correct the scanner result:{ex}")
236187

237188
try:
238-
if exist_src:
239-
success, err_msg = write_output_with_osslist(src_oss_list, _output_dir, output_files['SRC'],
240-
output_extension, SRC_SHEET)
241-
if not success:
242-
logger.warning(err_msg)
243-
if exist_bin:
244-
success, err_msg = write_output_with_osslist(bin_oss_list, _output_dir, output_files['BIN'],
245-
output_extension, BIN_SHEET, BIN_EXT_HEADER, BIN_HIDDEN_HEADER)
246-
if not success:
247-
logger.warning(err_msg)
248189
if duplicates:
249190
logger.info('Success to correct the src/bin scanner result')
250191
except Exception as ex:
251192
logger.warning(f"Corrected src/bin scanner result:{ex}")
252-
return
253-
254-
255-
def write_output_with_osslist(oss_list, output_dir, output_file, output_extension, sheetname, extended_hdr={}, hidden_hdr={}):
256-
new_oss_list = []
257-
sheet_list = {}
258-
sheet_list[sheetname] = []
259-
260-
for src_item in oss_list:
261-
scanner_name = constant.supported_sheet_and_scanner[sheetname]
262-
new_oss_list.append(src_item.get_print_array(scanner_name)[0])
263-
sheet_list[sheetname].extend(new_oss_list)
264-
if os.path.exists(os.path.join(output_dir, output_file)):
265-
os.remove(os.path.join(output_dir, output_file))
266-
success, err_msg, _ = write_output_file(os.path.join(output_dir, output_file).rstrip(output_extension),
267-
output_extension, sheet_list, extended_hdr, hidden_hdr)
268-
return success, err_msg
193+
return all_scan_item
269194

270195

271196
def get_osslist(_output_dir, output_file, output_extension, sheet_name=''):
@@ -285,14 +210,12 @@ def get_osslist(_output_dir, output_file, output_extension, sheet_name=''):
285210
return oss_list
286211

287212

288-
def check_exclude_dir(oss_list):
213+
def check_exclude_dir(source_name_or_path):
289214
_exclude_dirs = ["venv", "node_modules", "Pods", "Carthage"]
215+
exclude = False
290216

291-
for oss_item in oss_list:
292-
if not oss_item.source_name_or_path:
293-
continue
294-
for exclude_dir in _exclude_dirs:
295-
if exclude_dir in oss_item.source_name_or_path[0].split(os.path.sep):
296-
oss_item.exclude = True
297-
break
298-
return oss_list
217+
for exclude_dir in _exclude_dirs:
218+
if exclude_dir in source_name_or_path.split(os.path.sep):
219+
exclude = True
220+
break
221+
return exclude

src/fosslight_scanner/fosslight_scanner.py

Lines changed: 39 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@
2525
import fosslight_util.constant as constant
2626
from fosslight_util.output_format import check_output_format
2727
from fosslight_prechecker._precheck import run_lint as prechecker_lint
28-
from fosslight_util.write_excel import merge_excels, merge_cover_comment
2928
from fosslight_util.cover import CoverItem
29+
from fosslight_util.oss_item import ScannerItem
30+
from fosslight_util.output_format import write_output_file
3031

3132
from .common import (
32-
copy_file, call_analysis_api, overwrite_excel,
33-
merge_yamls, correct_scanner_result, create_scancodejson
33+
copy_file, call_analysis_api, update_oss_item,
34+
correct_scanner_result, create_scancodejson
3435
)
3536
from ._run_compare import run_compare
3637

@@ -56,7 +57,7 @@
5657

5758

5859
def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_exclude=[]):
59-
result_list = []
60+
result = []
6061

6162
package_manager = ""
6263
pip_activate_cmd = ""
@@ -90,7 +91,7 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_ex
9091
timer.start()
9192

9293
try:
93-
success, result = call_analysis_api(
94+
success, scan_item = call_analysis_api(
9495
path_to_analyze, "Dependency Analysis",
9596
1, run_dependency_scanner,
9697
package_manager,
@@ -101,11 +102,11 @@ def run_dependency(path_to_analyze, output_file_with_path, params="", path_to_ex
101102
github_token, path_to_exclude=path_to_exclude
102103
)
103104
if success:
104-
result_list = result.get('SRC_FL_Dependency')
105+
result = scan_item
105106
except Exception as ex:
106107
logger.warning(f"Run dependency: {ex}")
107108

108-
return result_list or []
109+
return result
109110

110111

111112
def source_analysis_wrapper(*args, **kwargs):
@@ -130,7 +131,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
130131
source_time_out=120, binary_simple=False):
131132
final_excel_dir = output_path
132133
success = True
133-
temp_output_fiiles = []
134+
all_cover_items = []
135+
all_scan_item = ScannerItem(PKG_NAME, _start_time)
134136
if not remove_src_data:
135137
success, final_excel_dir, result_log = init(output_path)
136138

@@ -158,9 +160,6 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
158160
-1, prechecker_lint,
159161
abs_path, False, output_prechecker,
160162
exclude_path=path_to_exclude)
161-
success_file, copied_file = copy_file(output_prechecker, output_path)
162-
if success_file:
163-
temp_output_fiiles.append(copied_file)
164163

165164
if run_src:
166165
try:
@@ -180,6 +179,9 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
180179
source_print_matched_text=source_print_matched_text,
181180
source_time_out=source_time_out
182181
)
182+
if success:
183+
all_scan_item.file_items.update(result[2].file_items)
184+
all_cover_items.append(result[2].cover)
183185

184186
else: # Run fosslight_source by using docker image
185187
src_output = os.path.join("output", output_files["SRC"])
@@ -195,16 +197,22 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
195197
logger.warning(f"Failed to run source analysis: {ex}")
196198

197199
if run_bin:
198-
success, _ = call_analysis_api(src_path, "Binary Analysis",
199-
1, binary_analysis.find_binaries,
200-
abs_path,
201-
os.path.join(_output_dir, output_files["BIN"]),
202-
"", db_url, binary_simple,
203-
correct_mode, correct_fpath,
204-
path_to_exclude=path_to_exclude)
200+
success, result = call_analysis_api(src_path, "Binary Analysis",
201+
1, binary_analysis.find_binaries,
202+
abs_path,
203+
os.path.join(_output_dir, output_files["BIN"]),
204+
"", db_url, binary_simple,
205+
correct_mode, correct_fpath,
206+
path_to_exclude=path_to_exclude)
207+
if success:
208+
all_scan_item.file_items.update(result.file_items)
209+
all_cover_items.append(result.cover)
205210

206211
if run_dep:
207-
run_dependency(src_path, os.path.join(_output_dir, output_files["DEP"]), dep_arguments, path_to_exclude)
212+
dep_scanitem = run_dependency(src_path, os.path.join(_output_dir, output_files["DEP"]),
213+
dep_arguments, path_to_exclude)
214+
all_scan_item.file_items.update(dep_scanitem.file_items)
215+
all_cover_items.append(dep_scanitem.cover)
208216

209217
else:
210218
return
@@ -215,46 +223,23 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
215223
try:
216224
output_file_without_ext = os.path.join(final_excel_dir, output_file)
217225
final_report = f"{output_file_without_ext}{output_extension}"
218-
merge_files = [output_files["SRC"], output_files["BIN"], output_files["DEP"]]
219226
cover = CoverItem(tool_name=PKG_NAME,
220227
start_time=_start_time,
221228
input_path=abs_path,
222229
exclude_path=path_to_exclude,
223230
simple_mode=False)
224-
cover.comment = merge_cover_comment(_output_dir, merge_files)
225-
226-
if output_extension == ".xlsx":
227-
tmp_dir = f"tmp_{datetime.now().strftime('%y%m%d_%H%M')}"
228-
exist_src = False
229-
exist_bin = False
230-
if correct_mode:
231-
os.makedirs(os.path.join(_output_dir, tmp_dir), exist_ok=True)
232-
if os.path.exists(os.path.join(_output_dir, output_files['SRC'])):
233-
exist_src = True
234-
shutil.copy2(os.path.join(_output_dir, output_files['SRC']), os.path.join(_output_dir, tmp_dir))
235-
if os.path.exists(os.path.join(_output_dir, output_files['BIN'])):
236-
exist_bin = True
237-
shutil.copy2(os.path.join(_output_dir, output_files['BIN']), os.path.join(_output_dir, tmp_dir))
238-
if exist_src or exist_bin:
239-
correct_scanner_result(_output_dir, output_files, output_extension, exist_src, exist_bin)
240-
241-
if remove_src_data:
242-
overwrite_excel(_output_dir, default_oss_name, "OSS Name")
243-
overwrite_excel(_output_dir, default_oss_version, "OSS Version")
244-
overwrite_excel(_output_dir, url, "Download Location")
245-
success, err_msg = merge_excels(_output_dir, final_report, merge_files, cover)
246-
247-
if correct_mode:
248-
if exist_src:
249-
shutil.move(os.path.join(_output_dir, tmp_dir, output_files['SRC']),
250-
os.path.join(_output_dir, output_files['SRC']))
251-
if exist_bin:
252-
shutil.move(os.path.join(_output_dir, tmp_dir, output_files['BIN']),
253-
os.path.join(_output_dir, output_files['BIN']))
254-
shutil.rmtree(os.path.join(_output_dir, tmp_dir), ignore_errors=True)
255-
elif output_extension == ".yaml":
256-
success, err_msg = merge_yamls(_output_dir, merge_files, final_report,
257-
remove_src_data, default_oss_name, default_oss_version, url)
231+
merge_comment = []
232+
for ci in all_cover_items:
233+
merge_comment.append(str(f'[{ci.tool_name}] {ci.comment}'))
234+
cover.comment = '\n'.join(merge_comment)
235+
all_scan_item.cover = cover
236+
237+
if correct_mode:
238+
all_scan_item = correct_scanner_result(all_scan_item)
239+
240+
if remove_src_data:
241+
all_scan_item = update_oss_item(all_scan_item, default_oss_name, default_oss_version, url)
242+
success, err_msg, final_report = write_output_file(output_file_without_ext, output_extension, all_scan_item)
258243
if success:
259244
if os.path.isfile(final_report):
260245
logger.info(f'Generated the result file: {final_report}')

0 commit comments

Comments
 (0)