Skip to content

Commit 6156494

Browse files
authored
Merge pull request #62 from fosslight/correct
Correct the source/bin scanner result
2 parents 1d9d7a4 + b2a40d2 commit 6156494

File tree

4 files changed

+157
-16
lines changed

4 files changed

+157
-16
lines changed

src/fosslight_scanner/_help.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
-r\t\t\t Keep raw data
3434
-t\t\t\t Hide the progress bar
3535
-v\t\t\t Print FOSSLight Scanner version
36+
--no_correction\t Enter if you don't want to correct OSS information with sbom-info.yaml
37+
--correct_fpath <path> Path to the sbom-info.yaml file
3638
3739
Options for only 'all' or 'bin' mode
3840
-u <db_url>\t\t DB Connection(format :'postgresql://username:password@host:port/database_name')

src/fosslight_scanner/cli.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@ def main():
2121
parser.add_argument('--dependency', '-d', help='Dependency arguments', type=str, dest='dep_argument', default="")
2222
parser.add_argument('--url', '-u', help="DB Url", type=str, dest='db_url', default="")
2323
parser.add_argument('--core', '-c', help='Number of processes to analyze source', type=int, dest='core', default=-1)
24-
parser.add_argument('--raw', '-r', help='Keep raw data', action='store_true', dest='raw', default=False)
25-
parser.add_argument('--timer', '-t', help='Hide the progress bar', action='store_true', dest='timer', default=False)
26-
parser.add_argument('--version', '-v', help='Print version', action='store_true', dest='version', default=False)
24+
parser.add_argument('--raw', '-r', help='Keep raw data', action='store_true', dest='raw', default=False)
25+
parser.add_argument('--timer', '-t', help='Hide the progress bar', action='store_true', dest='timer', default=False)
26+
parser.add_argument('--version', '-v', help='Print version', action='store_true', dest='version', default=False)
2727
parser.add_argument('--help', '-h', help='Print help message', action='store_true', dest='help')
28+
parser.add_argument('--no_correction', help='No correction with sbom-info.yaml',
29+
action='store_true', required=False, default=False)
30+
parser.add_argument('--correct_fpath', help='Path to the sbom-info.yaml',
31+
type=str, required=False, default='')
2832

2933
try:
3034
args = parser.parse_args()
@@ -37,7 +41,8 @@ def main():
3741
print_package_version(PKG_NAME, "FOSSLight Scanner Version:")
3842
else:
3943
run_main(args.mode, args.path, args.dep_argument, args.output, args.file,
40-
args.link, args.db_url, args.timer, args.raw, args.core)
44+
args.link, args.db_url, args.timer, args.raw, args.core,
45+
not args.no_correction, args.correct_fpath)
4146

4247

4348
if __name__ == "__main__":

src/fosslight_scanner/common.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,26 @@
66
import os
77
import sys
88
import logging
9-
from shutil import copy
9+
import shutil
1010
import re
1111
import pandas as pd
1212
import yaml
13+
import copy
1314
import fosslight_util.constant as constant
1415
from fosslight_util.parsing_yaml import parsing_yml
1516
from fosslight_util.write_yaml import create_yaml_with_ossitem
17+
from fosslight_util.read_excel import read_oss_report
18+
from fosslight_util.output_format import write_output_file
1619

1720
logger = logging.getLogger(constant.LOGGER_NAME)
21+
SRC_SHEET = 'SRC_FL_Source'
22+
BIN_SHEET = 'BIN_FL_Binary'
1823

1924

2025
def copy_file(source, destination):
2126
copied_file = ""
2227
try:
23-
copy(source, destination)
28+
shutil.copy(source, destination)
2429
if os.path.isdir(destination):
2530
copied_file = os.path.join(destination, os.path.basename(source))
2631
else:
@@ -194,3 +199,102 @@ def merge_yamls(_output_dir, merge_yaml_files, final_report, remove_src_data=Fal
194199
success = False
195200

196201
return success, err_msg
202+
203+
204+
def correct_scanner_result(_output_dir, output_files, exist_src, exist_bin):
205+
src_oss_list = []
206+
bin_oss_list = []
207+
duplicates = False
208+
209+
if exist_src:
210+
src_oss_list = check_exclude_dir(get_osslist_with_xlsx(_output_dir, output_files['SRC'], SRC_SHEET))
211+
if exist_bin:
212+
bin_oss_list = check_exclude_dir(get_osslist_with_xlsx(_output_dir, output_files['BIN'], BIN_SHEET))
213+
214+
if exist_src and exist_bin:
215+
try:
216+
dup_bin_list = []
217+
exclude_list = []
218+
for src_item in src_oss_list:
219+
dup_bin = ''
220+
for idx, bin_item in enumerate(bin_oss_list):
221+
if not src_item.source_name_or_path:
222+
continue
223+
if src_item.source_name_or_path[0] == bin_item.source_name_or_path[0]:
224+
dup_bin = copy.deepcopy(bin_item)
225+
if not dup_bin.license:
226+
if dup_bin.exclude:
227+
src_item.exclude = dup_bin.exclude
228+
dup_bin.set_sheet_item(src_item.get_print_array()[0])
229+
if dup_bin.comment:
230+
dup_bin.comment += '/'
231+
dup_bin.comment += 'Loaded from SRC OSS info'
232+
dup_bin_list.append(dup_bin)
233+
exclude_list.append(idx)
234+
if dup_bin:
235+
src_item.exclude = True
236+
if src_item.comment:
237+
src_item.comment += '/'
238+
src_item.comment += 'Excluded by duplicated binary within BIN'
239+
duplicates = True
240+
241+
exclude_list = list(set(exclude_list))
242+
for idx in exclude_list:
243+
bin_oss_list[idx].exclude = True
244+
if bin_oss_list[idx].comment:
245+
bin_oss_list[idx].comment += '/'
246+
bin_oss_list[idx].comment += 'Excluded by SRC OSS info'
247+
bin_oss_list.extend(dup_bin_list)
248+
except Exception as ex:
249+
logger.warning(f"correct the scanner result:{ex}")
250+
251+
try:
252+
if exist_src:
253+
success, err_msg = write_xlsx_with_osslist(src_oss_list, _output_dir, output_files['SRC'], SRC_SHEET)
254+
if not success:
255+
logger.warning(err_msg)
256+
if exist_bin:
257+
success, err_msg = write_xlsx_with_osslist(bin_oss_list, _output_dir, output_files['BIN'], BIN_SHEET)
258+
if not success:
259+
logger.warning(err_msg)
260+
if duplicates:
261+
logger.info('Success to correct the src/bin scanner result')
262+
except Exception as ex:
263+
logger.warning(f"Corrected src/bin scanner result:{ex}")
264+
return
265+
266+
267+
def write_xlsx_with_osslist(oss_list, output_dir, output_file, sheetname):
268+
sheet_list = {}
269+
sheet_list[sheetname] = []
270+
new_oss_list = []
271+
272+
for src_item in oss_list:
273+
new_oss_list.append(src_item.get_print_array()[0])
274+
sheet_list[sheetname].extend(new_oss_list)
275+
if os.path.exists(os.path.join(output_dir, output_file)):
276+
os.remove(os.path.join(output_dir, output_file))
277+
success, err_msg, result_file = write_output_file(os.path.join(output_dir, output_file).rstrip('xlsx'), '.xlsx',
278+
sheet_list)
279+
return success, err_msg
280+
281+
282+
def get_osslist_with_xlsx(_output_dir, output_file, sheet_name):
283+
oss_list = []
284+
oss_xlsx = os.path.join(_output_dir, output_file)
285+
286+
if os.path.exists(oss_xlsx):
287+
oss_list.extend(read_oss_report(oss_xlsx, sheet_name))
288+
289+
return oss_list
290+
291+
292+
def check_exclude_dir(oss_list):
293+
_exclude_dirs = ["venv", "node_modules", "Pods", "Carthage"]
294+
295+
for oss_item in oss_list:
296+
for exclude_dir in _exclude_dirs:
297+
if exclude_dir in oss_item.source_name_or_path[0].split(os.path.sep):
298+
oss_item.exclude = True
299+
break
300+
return oss_list

src/fosslight_scanner/fosslight_scanner.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@
99
import re
1010
import yaml
1111
import sys
12+
import shutil
1213
from pathlib import Path
13-
from shutil import rmtree as rmdir
1414
from datetime import datetime
1515
from fosslight_binary import binary_analysis
1616
from fosslight_dependency.run_dependency_scanner import run_dependency_scanner
@@ -23,7 +23,7 @@
2323
from fosslight_prechecker._precheck import run_lint as prechecker_lint
2424
from .common import (copy_file, call_analysis_api,
2525
overwrite_excel, extract_name_from_link,
26-
merge_yamls)
26+
merge_yamls, correct_scanner_result)
2727
from fosslight_util.write_excel import merge_excels
2828
from ._run_compare import run_compare
2929
import subprocess
@@ -102,7 +102,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
102102
run_src=True, run_bin=True, run_dep=True, run_prechecker=True,
103103
remove_src_data=True, result_log={}, output_file="",
104104
output_extension="", num_cores=-1, db_url="",
105-
default_oss_name="", url=""):
105+
default_oss_name="", url="",
106+
correct_mode=True, correct_fpath=""):
106107
final_excel_dir = output_path
107108
success = True
108109
temp_output_fiiles = []
@@ -115,6 +116,9 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
115116
if output_extension == "":
116117
output_extension = ".xlsx"
117118

119+
if not correct_fpath:
120+
correct_fpath = src_path
121+
118122
try:
119123
sheet_list = {}
120124
final_excel_dir = os.path.abspath(final_excel_dir)
@@ -149,7 +153,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
149153
sheet_list["SRC_FL_Source"] = [scan_item.get_row_to_print() for scan_item in result[2]]
150154
need_license = True if output_extension == ".xlsx" else False
151155
create_report_file(0, result[2], result[3], 'all', need_license,
152-
_output_dir, output_files["SRC"].split('.')[0], output_extension)
156+
_output_dir, output_files["SRC"].split('.')[0], output_extension,
157+
correct_mode, correct_fpath, abs_path)
153158
else: # Run fosslight_source by using docker image
154159
src_output = os.path.join("output", output_files["SRC"])
155160
output_rel_path = os.path.relpath(abs_path, os.getcwd())
@@ -166,7 +171,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
166171
1, binary_analysis.find_binaries,
167172
abs_path,
168173
os.path.join(_output_dir, output_files["BIN"]),
169-
"", db_url)
174+
"", db_url, False,
175+
correct_mode, correct_fpath)
170176
if success:
171177
output_binary_txt_raw = f"{output_files['BIN'].split('.')[0]}.txt"
172178
success_file, copied_file = copy_file(os.path.join(_output_dir, output_binary_txt_raw),
@@ -186,6 +192,19 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
186192
try:
187193
output_file_without_ext = os.path.join(final_excel_dir, output_file)
188194
final_report = f"{output_file_without_ext}{output_extension}"
195+
tmp_dir = f"tmp_{datetime.now().strftime('%y%m%d_%H%M')}"
196+
exist_src = False
197+
exist_bin = False
198+
if correct_mode:
199+
os.makedirs(os.path.join(_output_dir, tmp_dir), exist_ok=True)
200+
if os.path.exists(os.path.join(_output_dir, output_files['SRC'])):
201+
exist_src = True
202+
shutil.copy2(os.path.join(_output_dir, output_files['SRC']), os.path.join(_output_dir, tmp_dir))
203+
if os.path.exists(os.path.join(_output_dir, output_files['BIN'])):
204+
exist_bin = True
205+
shutil.copy2(os.path.join(_output_dir, output_files['BIN']), os.path.join(_output_dir, tmp_dir))
206+
if exist_src or exist_bin:
207+
correct_scanner_result(_output_dir, output_files, exist_src, exist_bin)
189208
if output_extension == ".xlsx":
190209
if remove_src_data:
191210
overwrite_excel(_output_dir, default_oss_name, "OSS Name")
@@ -195,7 +214,14 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
195214
merge_yaml_files = [output_files["SRC"], output_files["BIN"], output_files["DEP"]]
196215
success, err_msg = merge_yamls(_output_dir, merge_yaml_files, final_report,
197216
remove_src_data, default_oss_name, url)
198-
217+
if correct_mode:
218+
if exist_src:
219+
shutil.move(os.path.join(_output_dir, tmp_dir, output_files['SRC']),
220+
os.path.join(_output_dir, output_files['SRC']))
221+
if exist_bin:
222+
shutil.move(os.path.join(_output_dir, tmp_dir, output_files['BIN']),
223+
os.path.join(_output_dir, output_files['BIN']))
224+
shutil.rmtree(os.path.join(_output_dir, tmp_dir), ignore_errors=True)
199225
if success:
200226
if os.path.isfile(final_report):
201227
result_log["Output File"] = final_report
@@ -212,7 +238,7 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
212238
try:
213239
if remove_src_data:
214240
logger.debug(f"Remove temporary source: {src_path}")
215-
rmdir(src_path)
241+
shutil.rmtree(src_path)
216242
except Exception as ex:
217243
logger.debug(f"Error to remove temp files:{ex}")
218244

@@ -267,7 +293,7 @@ def init(output_path="", make_outdir=True):
267293

268294

269295
def run_main(mode, path_arg, dep_arguments, output_file_or_dir, file_format, url_to_analyze, db_url,
270-
hide_progressbar=False, keep_raw_data=False, num_cores=-1):
296+
hide_progressbar=False, keep_raw_data=False, num_cores=-1, correct_mode=True, correct_fpath=""):
271297
global _executed_path, _start_time
272298

273299
output_file = ""
@@ -348,16 +374,20 @@ def run_main(mode, path_arg, dep_arguments, output_file_or_dir, file_format, url
348374
default_oss_name = extract_name_from_link(url_to_analyze)
349375
success, src_path = download_source(url_to_analyze, output_path)
350376

377+
if not correct_fpath:
378+
correct_fpath = src_path
379+
351380
if src_path != "":
352381
run_scanner(src_path, dep_arguments, output_path, keep_raw_data,
353382
run_src, run_bin, run_dep, run_prechecker,
354383
remove_downloaded_source, {}, output_file,
355384
output_extension, num_cores, db_url,
356-
default_oss_name, url_to_analyze)
385+
default_oss_name, url_to_analyze,
386+
correct_mode, correct_fpath)
357387
try:
358388
if not keep_raw_data:
359389
logger.debug(f"Remove temporary files: {_output_dir}")
360-
rmdir(_output_dir)
390+
shutil.rmtree(_output_dir)
361391
except Exception as ex:
362392
logger.debug(f"Error to remove temp files:{ex}")
363393
except Exception as ex:

0 commit comments

Comments
 (0)