Skip to content

Commit ded0360

Browse files
committed
Correct the source/bin scanner result
Signed-off-by: Jiyeong Seok <[email protected]>
1 parent 1d9d7a4 commit ded0360

File tree

4 files changed

+144
-16
lines changed

4 files changed

+144
-16
lines changed

src/fosslight_scanner/_help.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
-r\t\t\t Keep raw data
3434
-t\t\t\t Hide the progress bar
3535
-v\t\t\t Print FOSSLight Scanner version
36+
--no_correction\t Enter if you don't want to correct OSS information with sbom-info.yaml
37+
--correct_fpath <path> Path to the sbom-info.yaml file
3638
3739
Options for only 'all' or 'bin' mode
3840
-u <db_url>\t\t DB Connection(format :'postgresql://username:password@host:port/database_name')

src/fosslight_scanner/cli.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ def main():
2121
parser.add_argument('--dependency', '-d', help='Dependency arguments', type=str, dest='dep_argument', default="")
2222
parser.add_argument('--url', '-u', help="DB Url", type=str, dest='db_url', default="")
2323
parser.add_argument('--core', '-c', help='Number of processes to analyze source', type=int, dest='core', default=-1)
24-
parser.add_argument('--raw', '-r', help='Keep raw data', action='store_true', dest='raw', default=False)
25-
parser.add_argument('--timer', '-t', help='Hide the progress bar', action='store_true', dest='timer', default=False)
26-
parser.add_argument('--version', '-v', help='Print version', action='store_true', dest='version', default=False)
24+
parser.add_argument('--raw', '-r', help='Keep raw data', action='store_true', dest='raw', default=False)
25+
parser.add_argument('--timer', '-t', help='Hide the progress bar', action='store_true', dest='timer', default=False)
26+
parser.add_argument('--version', '-v', help='Print version', action='store_true', dest='version', default=False)
2727
parser.add_argument('--help', '-h', help='Print help message', action='store_true', dest='help')
28+
parser.add_argument('--no_correction', help='No correction with sbom-info.yaml',
29+
action='store_true', required=False, default=False)
30+
parser.add_argument('--correct_fpath', help='Path to the sbom-info.yaml',
31+
nargs=1, type=str, required=False, default='')
2832

2933
try:
3034
args = parser.parse_args()
@@ -37,7 +41,8 @@ def main():
3741
print_package_version(PKG_NAME, "FOSSLight Scanner Version:")
3842
else:
3943
run_main(args.mode, args.path, args.dep_argument, args.output, args.file,
40-
args.link, args.db_url, args.timer, args.raw, args.core)
44+
args.link, args.db_url, args.timer, args.raw, args.core,
45+
args.no_correction, args.correct_fpath)
4146

4247

4348
if __name__ == "__main__":

src/fosslight_scanner/common.py

Lines changed: 100 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,26 @@
66
import os
77
import sys
88
import logging
9-
from shutil import copy
9+
import shutil
1010
import re
1111
import pandas as pd
1212
import yaml
13+
import copy
1314
import fosslight_util.constant as constant
1415
from fosslight_util.parsing_yaml import parsing_yml
1516
from fosslight_util.write_yaml import create_yaml_with_ossitem
17+
from fosslight_util.read_excel import read_oss_report
18+
from fosslight_util.output_format import write_output_file
1619

1720
logger = logging.getLogger(constant.LOGGER_NAME)
21+
SRC_SHEET = 'SRC_FL_Source'
22+
BIN_SHEET = 'BIN_FL_Binary'
1823

1924

2025
def copy_file(source, destination):
2126
copied_file = ""
2227
try:
23-
copy(source, destination)
28+
shutil.copy(source, destination)
2429
if os.path.isdir(destination):
2530
copied_file = os.path.join(destination, os.path.basename(source))
2631
else:
@@ -194,3 +199,96 @@ def merge_yamls(_output_dir, merge_yaml_files, final_report, remove_src_data=Fal
194199
success = False
195200

196201
return success, err_msg
202+
203+
204+
def correct_scanner_result(_output_dir, output_files):
205+
src_oss_list = []
206+
bin_oss_list = []
207+
208+
try:
209+
src_oss_list = check_exclude_dir(get_osslist_with_xlsx(_output_dir, output_files['SRC'], SRC_SHEET))
210+
bin_oss_list = check_exclude_dir(get_osslist_with_xlsx(_output_dir, output_files['BIN'], BIN_SHEET))
211+
212+
duplicates = False
213+
dup_bin_list = []
214+
exclude_list = []
215+
for src_item in src_oss_list:
216+
dup_bin = ''
217+
for idx, bin_item in enumerate(bin_oss_list):
218+
if not src_item.source_name_or_path:
219+
continue
220+
if src_item.source_name_or_path[0] == bin_item.source_name_or_path[0]:
221+
dup_bin = copy.deepcopy(bin_item)
222+
if not dup_bin.license:
223+
if dup_bin.exclude:
224+
src_item.exclude = dup_bin.exclude
225+
dup_bin.set_sheet_item(src_item.get_print_array()[0])
226+
if dup_bin.comment:
227+
dup_bin.comment += '/'
228+
dup_bin.comment += 'Loaded from SRC OSS info'
229+
dup_bin_list.append(dup_bin)
230+
exclude_list.append(idx)
231+
if dup_bin:
232+
src_item.exclude = True
233+
if src_item.comment:
234+
src_item.comment += '/'
235+
src_item.comment += 'Excluded by duplicated binary within BIN'
236+
duplicates = True
237+
238+
exclude_list = list(set(exclude_list))
239+
for idx in exclude_list:
240+
bin_oss_list[idx].exclude = True
241+
if bin_oss_list[idx].comment:
242+
bin_oss_list[idx].comment += '/'
243+
bin_oss_list[idx].comment += 'Excluded by SRC OSS info'
244+
bin_oss_list.extend(dup_bin_list)
245+
246+
success, err_msg = write_xlsx_with_osslist(src_oss_list, _output_dir, output_files['SRC'], SRC_SHEET)
247+
if not success:
248+
logger.warning(err_msg)
249+
success, err_msg = write_xlsx_with_osslist(bin_oss_list, _output_dir, output_files['BIN'], BIN_SHEET)
250+
if not success:
251+
logger.warning(err_msg)
252+
if duplicates:
253+
logger.info('Success to correct the src/bin scanner result')
254+
255+
except Exception as ex:
256+
logger.warning(f"correct the scanner result:{ex}")
257+
258+
return
259+
260+
261+
def write_xlsx_with_osslist(oss_list, output_dir, output_file, sheetname):
262+
sheet_list = {}
263+
sheet_list[sheetname] = []
264+
new_oss_list = []
265+
266+
for src_item in oss_list:
267+
new_oss_list.append(src_item.get_print_array()[0])
268+
sheet_list[sheetname].extend(new_oss_list)
269+
if os.path.exists(os.path.join(output_dir, output_file)):
270+
os.remove(os.path.join(output_dir, output_file))
271+
success, err_msg, result_file = write_output_file(os.path.join(output_dir, output_file).rstrip('xlsx'), '.xlsx',
272+
sheet_list)
273+
return success, err_msg
274+
275+
276+
def get_osslist_with_xlsx(_output_dir, output_file, sheet_name):
277+
oss_list = []
278+
oss_xlsx = os.path.join(_output_dir, output_file)
279+
280+
if os.path.exists(oss_xlsx):
281+
oss_list.extend(read_oss_report(oss_xlsx, sheet_name))
282+
283+
return oss_list
284+
285+
286+
def check_exclude_dir(oss_list):
287+
_exclude_dirs = ["venv", "node_modules", "Pods", "Carthage"]
288+
289+
for oss_item in oss_list:
290+
for exclude_dir in _exclude_dirs:
291+
if exclude_dir in oss_item.source_name_or_path[0].split(os.path.sep):
292+
oss_item.exclude = True
293+
break
294+
return oss_list

src/fosslight_scanner/fosslight_scanner.py

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
import re
1010
import yaml
1111
import sys
12+
import shutil
1213
from pathlib import Path
13-
from shutil import rmtree as rmdir
1414
from datetime import datetime
1515
from fosslight_binary import binary_analysis
1616
from fosslight_dependency.run_dependency_scanner import run_dependency_scanner
@@ -23,7 +23,7 @@
2323
from fosslight_prechecker._precheck import run_lint as prechecker_lint
2424
from .common import (copy_file, call_analysis_api,
2525
overwrite_excel, extract_name_from_link,
26-
merge_yamls)
26+
merge_yamls, correct_scanner_result)
2727
from fosslight_util.write_excel import merge_excels
2828
from ._run_compare import run_compare
2929
import subprocess
@@ -102,7 +102,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
102102
run_src=True, run_bin=True, run_dep=True, run_prechecker=True,
103103
remove_src_data=True, result_log={}, output_file="",
104104
output_extension="", num_cores=-1, db_url="",
105-
default_oss_name="", url=""):
105+
default_oss_name="", url="",
106+
correct_mode=True, correct_fpath=""):
106107
final_excel_dir = output_path
107108
success = True
108109
temp_output_fiiles = []
@@ -115,6 +116,9 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
115116
if output_extension == "":
116117
output_extension = ".xlsx"
117118

119+
if not correct_fpath:
120+
correct_fpath = src_path
121+
118122
try:
119123
sheet_list = {}
120124
final_excel_dir = os.path.abspath(final_excel_dir)
@@ -149,7 +153,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
149153
sheet_list["SRC_FL_Source"] = [scan_item.get_row_to_print() for scan_item in result[2]]
150154
need_license = True if output_extension == ".xlsx" else False
151155
create_report_file(0, result[2], result[3], 'all', need_license,
152-
_output_dir, output_files["SRC"].split('.')[0], output_extension)
156+
_output_dir, output_files["SRC"].split('.')[0], output_extension,
157+
correct_mode, correct_fpath, abs_path)
153158
else: # Run fosslight_source by using docker image
154159
src_output = os.path.join("output", output_files["SRC"])
155160
output_rel_path = os.path.relpath(abs_path, os.getcwd())
@@ -166,7 +171,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
166171
1, binary_analysis.find_binaries,
167172
abs_path,
168173
os.path.join(_output_dir, output_files["BIN"]),
169-
"", db_url)
174+
"", db_url, False,
175+
correct_mode, correct_fpath)
170176
if success:
171177
output_binary_txt_raw = f"{output_files['BIN'].split('.')[0]}.txt"
172178
success_file, copied_file = copy_file(os.path.join(_output_dir, output_binary_txt_raw),
@@ -186,6 +192,12 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
186192
try:
187193
output_file_without_ext = os.path.join(final_excel_dir, output_file)
188194
final_report = f"{output_file_without_ext}{output_extension}"
195+
tmp_dir = 'tmp'
196+
if correct_mode:
197+
os.makedirs(os.path.join(_output_dir, tmp_dir), exist_ok=True)
198+
shutil.copy2(os.path.join(_output_dir, output_files['SRC']), os.path.join(_output_dir, tmp_dir))
199+
shutil.copy2(os.path.join(_output_dir, output_files['BIN']), os.path.join(_output_dir, tmp_dir))
200+
correct_scanner_result(_output_dir, output_files)
189201
if output_extension == ".xlsx":
190202
if remove_src_data:
191203
overwrite_excel(_output_dir, default_oss_name, "OSS Name")
@@ -195,7 +207,10 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
195207
merge_yaml_files = [output_files["SRC"], output_files["BIN"], output_files["DEP"]]
196208
success, err_msg = merge_yamls(_output_dir, merge_yaml_files, final_report,
197209
remove_src_data, default_oss_name, url)
198-
210+
if correct_mode:
211+
shutil.move(os.path.join(_output_dir, tmp_dir, output_files['SRC']), os.path.join(_output_dir, output_files['SRC']))
212+
shutil.move(os.path.join(_output_dir, tmp_dir, output_files['BIN']), os.path.join(_output_dir, output_files['BIN']))
213+
shutil.rmtree(os.path.join(_output_dir, tmp_dir), ignore_errors=False)
199214
if success:
200215
if os.path.isfile(final_report):
201216
result_log["Output File"] = final_report
@@ -212,7 +227,7 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
212227
try:
213228
if remove_src_data:
214229
logger.debug(f"Remove temporary source: {src_path}")
215-
rmdir(src_path)
230+
shutil.rmtree(src_path)
216231
except Exception as ex:
217232
logger.debug(f"Error to remove temp files:{ex}")
218233

@@ -267,13 +282,17 @@ def init(output_path="", make_outdir=True):
267282

268283

269284
def run_main(mode, path_arg, dep_arguments, output_file_or_dir, file_format, url_to_analyze, db_url,
270-
hide_progressbar=False, keep_raw_data=False, num_cores=-1):
285+
hide_progressbar=False, keep_raw_data=False, num_cores=-1, no_correction=False, correct_fpath=""):
271286
global _executed_path, _start_time
272287

273288
output_file = ""
274289
default_oss_name = ""
275290
src_path = ""
276291
_executed_path = os.getcwd()
292+
correct_mode = True
293+
294+
if no_correction:
295+
correct_mode = False
277296

278297
if mode == "compare":
279298
CUSTOMIZED_FORMAT = {'excel': '.xlsx', 'html': '.html', 'json': '.json', 'yaml': '.yaml'}
@@ -348,16 +367,20 @@ def run_main(mode, path_arg, dep_arguments, output_file_or_dir, file_format, url
348367
default_oss_name = extract_name_from_link(url_to_analyze)
349368
success, src_path = download_source(url_to_analyze, output_path)
350369

370+
if not correct_fpath:
371+
correct_fpath = src_path
372+
351373
if src_path != "":
352374
run_scanner(src_path, dep_arguments, output_path, keep_raw_data,
353375
run_src, run_bin, run_dep, run_prechecker,
354376
remove_downloaded_source, {}, output_file,
355377
output_extension, num_cores, db_url,
356-
default_oss_name, url_to_analyze)
378+
default_oss_name, url_to_analyze,
379+
correct_mode, correct_fpath)
357380
try:
358381
if not keep_raw_data:
359382
logger.debug(f"Remove temporary files: {_output_dir}")
360-
rmdir(_output_dir)
383+
shutil.rmtree(_output_dir)
361384
except Exception as ex:
362385
logger.debug(f"Error to remove temp files:{ex}")
363386
except Exception as ex:

0 commit comments

Comments
 (0)