Skip to content

Commit d653409

Browse files
committed
Supports for excluding paths
Signed-off-by: SeongjunJo <[email protected]>
1 parent 50a5067 commit d653409

File tree

8 files changed

+108
-29
lines changed

8 files changed

+108
-29
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ pyparsing
22
scancode-toolkit==32.0.2
33
scanoss
44
XlsxWriter
5-
fosslight_util>=1.4.40
5+
fosslight_util>=1.4.43
66
PyYAML
77
wheel>=0.38.1
88
intbitset

src/fosslight_source/_help.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
-h\t\t\t Print help message
1919
-v\t\t\t Print FOSSLight Source Scanner version
2020
-m\t\t\t Print additional information for scan result on separate sheets
21+
-e <path>\t\t Path to exclude from analysis (file and directory)
2122
-o <output_path>\t Output path (Path or file name)
2223
-f <format>\t\t Output file format (excel, csv, opossum, yaml)
2324
Options only for FOSSLight Source Scanner

src/fosslight_source/_parsing_scanoss_file.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
# Copyright (c) 2020 LG Electronics Inc.
44
# SPDX-License-Identifier: Apache-2.0
55

6+
import os
67
import logging
78
import fosslight_util.constant as constant
89
from ._scan_item import ScanItem
@@ -34,10 +35,14 @@ def parsing_extraInfo(scanned_result):
3435
return scanoss_extra_info
3536

3637

37-
def parsing_scanResult(scanoss_report):
38+
def parsing_scanResult(scanoss_report, path_to_scan="", path_to_exclude=[]):
3839
scanoss_file_item = []
40+
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
3941

4042
for file_path, findings in scanoss_report.items():
43+
abs_file_path = os.path.abspath(os.path.join(path_to_scan, file_path))
44+
if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path for exclude_path in abs_path_to_exclude):
45+
continue
4146
result_item = ScanItem(file_path)
4247
if 'id' in findings[0]:
4348
if "none" == findings[0]['id']:

src/fosslight_source/cli.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def main():
4444
_result_log = {}
4545

4646
path_to_scan = os.getcwd()
47+
path_to_exclude = []
4748
write_json_file = False
4849
output_file_name = ""
4950
print_matched_text = False
@@ -65,6 +66,7 @@ def main():
6566
parser.add_argument('-s', '--scanner', nargs=1, type=str, required=False, default='all')
6667
parser.add_argument('-t', '--timeout', type=int, required=False, default=120)
6768
parser.add_argument('-c', '--cores', type=int, required=False, default=-1)
69+
parser.add_argument('-e', '--exclude', nargs='*', required=False, default=[])
6870
parser.add_argument('--no_correction', action='store_true', required=False)
6971
parser.add_argument('--correct_fpath', nargs=1, type=str, required=False)
7072

@@ -78,6 +80,8 @@ def main():
7880
path_to_scan = os.getcwd()
7981
else:
8082
path_to_scan = ''.join(args.path)
83+
if args.exclude:
84+
path_to_exclude = args.exclude
8185
if args.json:
8286
write_json_file = True
8387
output_file_name = ''.join(args.output)
@@ -103,7 +107,8 @@ def main():
103107
if os.path.isdir(path_to_scan):
104108
result = []
105109
result = run_scanners(path_to_scan, output_file_name, write_json_file, core, True,
106-
print_matched_text, format, time_out, correct_mode, correct_filepath, selected_scanner)
110+
print_matched_text, format, time_out, correct_mode, correct_filepath,
111+
selected_scanner, path_to_exclude)
107112
_result_log["Scan Result"] = result[1]
108113

109114
try:
@@ -115,9 +120,26 @@ def main():
115120
sys.exit(1)
116121

117122

123+
def count_files(path_to_scan, path_to_exclude):
124+
total_files = 0
125+
excluded_files = 0
126+
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
127+
128+
for root, _, files in os.walk(path_to_scan):
129+
for file in files:
130+
file_path = os.path.join(root, file)
131+
abs_file_path = os.path.abspath(file_path)
132+
if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
133+
for exclude_path in abs_path_to_exclude):
134+
excluded_files += 1
135+
total_files += 1
136+
137+
return total_files, excluded_files
138+
139+
118140
def create_report_file(_start_time, merged_result, license_list, scanoss_result, selected_scanner, need_license=False,
119141
output_path="", output_file="", output_extension="", correct_mode=True, correct_filepath="",
120-
path_to_scan=""):
142+
path_to_scan="", path_to_exclude=[]):
121143
"""
122144
Create report files for given scanned result.
123145
@@ -146,9 +168,10 @@ def create_report_file(_start_time, merged_result, license_list, scanoss_result,
146168

147169
cover = CoverItem(tool_name=_PKG_NAME,
148170
start_time=_start_time,
149-
input_path=path_to_scan)
150-
files_count = sum([len(files) for r, d, files in os.walk(path_to_scan)])
151-
cover.comment = f"Total number of files: {files_count} "
171+
input_path=path_to_scan,
172+
exclude_path=path_to_exclude)
173+
files_count, removed_files_count = count_files(path_to_scan, path_to_exclude)
174+
cover.comment = f"Total number of files / removed files: {files_count} / {removed_files_count}"
152175
if len(merged_result) == 0:
153176
if files_count < 1:
154177
cover.comment += "(No file detected.)"
@@ -224,7 +247,7 @@ def merge_results(scancode_result=[], scanoss_result=[], spdx_downloads={}):
224247

225248
def run_scanners(path_to_scan, output_file_name="", write_json_file=False, num_cores=-1, called_by_cli=True,
226249
print_matched_text=False, format="", time_out=120, correct_mode=True, correct_filepath="",
227-
selected_scanner='all'):
250+
selected_scanner='all', path_to_exclude=[]):
228251
"""
229252
Run Scancode and scanoss.py for the given path.
230253
@@ -252,7 +275,7 @@ def run_scanners(path_to_scan, output_file_name="", write_json_file=False, num_c
252275

253276
success, msg, output_path, output_file, output_extension = check_output_format(output_file_name, format)
254277
logger, result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{start_time}.txt"),
255-
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan)
278+
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
256279
if output_extension != '.xlsx' and output_extension and print_matched_text:
257280
logger.warning("-m option is only available for excel.")
258281
print_matched_text = False
@@ -261,14 +284,17 @@ def run_scanners(path_to_scan, output_file_name="", write_json_file=False, num_c
261284
success, result_log[RESULT_KEY], scancode_result, license_list = run_scan(path_to_scan, output_file_name,
262285
write_json_file, num_cores, True,
263286
print_matched_text, format, called_by_cli,
264-
time_out, correct_mode, correct_filepath)
287+
time_out, correct_mode, correct_filepath,
288+
path_to_exclude)
265289
if selected_scanner == 'scanoss' or selected_scanner == 'all' or selected_scanner == '':
266-
scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, True, write_json_file, num_cores)
290+
scanoss_result = run_scanoss_py(path_to_scan, output_file_name, format, True,
291+
write_json_file, num_cores, path_to_exclude)
267292
if selected_scanner in SCANNER_TYPE:
268-
spdx_downloads = get_spdx_downloads(path_to_scan)
293+
spdx_downloads = get_spdx_downloads(path_to_scan, path_to_exclude)
269294
merged_result = merge_results(scancode_result, scanoss_result, spdx_downloads)
270-
create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner, print_matched_text,
271-
output_path, output_file, output_extension, correct_mode, correct_filepath, path_to_scan)
295+
create_report_file(start_time, merged_result, license_list, scanoss_result, selected_scanner,
296+
print_matched_text, output_path, output_file, output_extension, correct_mode,
297+
correct_filepath, path_to_scan, path_to_exclude)
272298
else:
273299
print_help_msg_source_scanner()
274300
result_log[RESULT_KEY] = "Unsupported scanner"

src/fosslight_source/run_scancode.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626
def run_scan(path_to_scan, output_file_name="",
2727
_write_json_file=False, num_cores=-1, return_results=False, need_license=False, format="",
28-
called_by_cli=False, time_out=120, correct_mode=True, correct_filepath=""):
28+
called_by_cli=False, time_out=120, correct_mode=True, correct_filepath="", path_to_exclude=[]):
2929
if not called_by_cli:
3030
global logger
3131

@@ -62,20 +62,35 @@ def run_scan(path_to_scan, output_file_name="",
6262

6363
if not called_by_cli:
6464
logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
65-
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan)
65+
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
6666

6767
num_cores = multiprocessing.cpu_count() - 1 if num_cores < 0 else num_cores
6868

6969
if os.path.isdir(path_to_scan):
7070
try:
7171
time_out = float(time_out)
72+
pretty_params = {}
73+
pretty_params["path_to_scan"] = path_to_scan
74+
pretty_params["path_to_exclude"] = path_to_exclude
75+
pretty_params["output_file"] = output_file_name
76+
total_files_to_excluded = []
77+
if path_to_exclude:
78+
for path in path_to_exclude:
79+
path = os.path.join(path_to_scan, path)
80+
if os.path.isdir(path):
81+
for root, _, files in os.walk(path):
82+
total_files_to_excluded.extend([os.path.normpath(os.path.join(root, file)).replace("\\", "/")
83+
for file in files])
84+
elif os.path.isfile(path):
85+
total_files_to_excluded.append(os.path.normpath(path).replace("\\", "/"))
86+
7287
rc, results = cli.run_scan(path_to_scan, max_depth=100,
7388
strip_root=True, license=True,
7489
copyright=True, return_results=True,
75-
processes=num_cores,
76-
output_json_pp=output_json_file,
77-
only_findings=True, license_text=True,
78-
url=True, timeout=time_out)
90+
processes=num_cores, pretty_params=pretty_params,
91+
output_json_pp=output_json_file, only_findings=True,
92+
license_text=True, url=True, timeout=time_out,
93+
include=(), ignore=tuple(total_files_to_excluded))
7994

8095
if not rc:
8196
msg = "Source code analysis failed."

src/fosslight_source/run_scanoss.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def get_scanoss_extra_info(scanned_result):
2929
return parsing_extraInfo(scanned_result)
3030

3131

32-
def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=False, write_json_file=False, num_threads=-1):
32+
def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=False,
33+
write_json_file=False, num_threads=-1, path_to_exclude=[]):
3334
"""
3435
Run scanoss.py for the given path.
3536
@@ -46,7 +47,7 @@ def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=F
4647
global logger
4748
_start_time = datetime.now().strftime('%y%m%d_%H%M')
4849
logger, _result_log = init_log(os.path.join(output_path, f"fosslight_log_src_{_start_time}.txt"),
49-
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan)
50+
True, logging.INFO, logging.DEBUG, _PKG_NAME, path_to_scan, path_to_exclude)
5051

5152
scanoss_file_list = []
5253
try:
@@ -73,9 +74,30 @@ def run_scanoss_py(path_to_scan, output_file_name="", format="", called_by_cli=F
7374
try:
7475
os.system(scan_command)
7576
if os.path.isfile(output_json_file):
77+
total_files_to_excluded = []
78+
if path_to_exclude:
79+
for path in path_to_exclude:
80+
path = os.path.join(path_to_scan, path)
81+
if os.path.isdir(path):
82+
for root, _, files in os.walk(path):
83+
root = root[len(path_to_scan) + 1:]
84+
total_files_to_excluded.extend([os.path.normpath(os.path.join(root, file)).replace('\\', '/')
85+
for file in files])
86+
elif os.path.isfile(path):
87+
path = path[len(path_to_scan) + 1:]
88+
total_files_to_excluded.append(os.path.normpath(path).replace('\\', '/'))
89+
90+
with open(output_json_file, "r") as st_json:
91+
st_python = json.load(st_json)
92+
for key_to_exclude in total_files_to_excluded:
93+
if key_to_exclude in st_python:
94+
del st_python[key_to_exclude]
95+
with open(output_json_file, 'w') as st_json:
96+
json.dump(st_python, st_json, indent=4)
7697
with open(output_json_file, "r") as st_json:
7798
st_python = json.load(st_json)
78-
scanoss_file_list = parsing_scanResult(st_python)
99+
scanoss_file_list = parsing_scanResult(st_python, path_to_scan, path_to_exclude)
100+
79101
except Exception as error:
80102
logger.debug(f"SCANOSS Parsing {path_to_scan}: {error}")
81103

src/fosslight_source/run_spdx_extractor.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,25 @@
1212
logger = logging.getLogger(constant.LOGGER_NAME)
1313

1414

15-
def get_file_list(path_to_scan):
15+
def get_file_list(path_to_scan, path_to_exclude=[]):
1616
file_list = []
17+
abs_path_to_exclude = [os.path.abspath(os.path.join(path_to_scan, path)) for path in path_to_exclude]
1718
for root, dirs, files in os.walk(path_to_scan):
1819
for file in files:
19-
file_list.append(os.path.join(root, file))
20+
file_path = os.path.join(root, file)
21+
abs_file_path = os.path.abspath(file_path)
22+
if any(os.path.commonpath([abs_file_path, exclude_path]) == exclude_path
23+
for exclude_path in abs_path_to_exclude):
24+
continue
25+
file_list.append(file_path)
2026
return file_list
2127

2228

23-
def get_spdx_downloads(path_to_scan):
29+
def get_spdx_downloads(path_to_scan, path_to_exclude=[]):
2430
download_dict = {}
2531
find_word = re.compile(rb"SPDX-PackageDownloadLocation\s*:\s*(\S+)", re.IGNORECASE)
2632

27-
file_list = get_file_list(path_to_scan)
33+
file_list = get_file_list(path_to_scan, path_to_exclude)
2834

2935
for file in file_list:
3036
try:

tox.ini

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ deps =
2727
commands =
2828
rm -rf test_scan
2929
fosslight_source -p tests/test_files -j -m -o test_scan
30+
fosslight_source -p tests -e test_files/test cli_test.py -j -m -o test_scan2
3031

3132
[testenv:release]
3233
deps =
@@ -38,8 +39,11 @@ commands =
3839
fosslight_source -p tests/test_files -o test_scan/scan_result.csv
3940
cat test_scan/scan_result.csv
4041

41-
fosslight_source -p tests/test_files -m -j -o test_scan2/
42-
ls test_scan2/
42+
fosslight_source -p tests -e test_files/test cli_test.py -j -m -o test_scan2/scan_exclude_result.csv
43+
cat test_scan2/scan_exclude_result.csv
44+
45+
fosslight_source -p tests/test_files -m -j -o test_scan3/
46+
ls test_scan3/
4347

4448
python tests/cli_test.py
4549
pytest -v --flake8

0 commit comments

Comments
 (0)