Skip to content

Commit 51420c2

Browse files
authored
Merge pull request #118 from fosslight/ossitem
Refactoring OSS item
2 parents a17f6bf + 2acef36 commit 51420c2

File tree

4 files changed

+130
-224
lines changed

4 files changed

+130
-224
lines changed

requirements.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ openpyxl
44
progress
55
pyyaml
66
beautifulsoup4
7-
fosslight_util~=1.4.48
8-
fosslight_source~=1.7.8
9-
fosslight_dependency~=3.15.1
10-
fosslight_binary~=4.1.30
7+
fosslight_util>=2.0.0
8+
fosslight_source>=2.0.0
9+
fosslight_dependency>=4.0.0
10+
fosslight_binary>=5.0.0
1111
fosslight_prechecker==3.0.27

src/fosslight_scanner/_run_compare.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
from bs4 import BeautifulSoup
1515
import fosslight_util.constant as constant
1616
from fosslight_util.compare_yaml import compare_yaml
17-
from fosslight_util.convert_excel_to_yaml import convert_excel_to_yaml
17+
from fosslight_util.read_excel import read_oss_report
18+
from fosslight_util.parsing_yaml import parsing_yml
1819

1920
logger = logging.getLogger(constant.LOGGER_NAME)
2021
ADD = "add"
@@ -255,10 +256,18 @@ def run_compare(before_f, after_f, output_path, output_file, file_ext, _start_ti
255256

256257
result_file = get_comparison_result_filename(output_path, output_file, file_ext, _start_time)
257258

258-
if before_ext == XLSX_EXT:
259-
convert_excel_to_yaml(before_f, before_yaml)
260-
convert_excel_to_yaml(after_f, after_yaml)
261-
compared_result = compare_yaml(before_yaml, after_yaml)
259+
before_basepath = os.path.dirname(before_f)
260+
after_basepath = os.path.dirname(after_f)
261+
if XLSX_EXT == before_ext:
262+
before_fileitems = read_oss_report(before_f, "", before_basepath)
263+
elif YAML_EXT == before_ext:
264+
before_fileitems, _, _ = parsing_yml(before_yaml, before_basepath)
265+
if XLSX_EXT == after_ext:
266+
after_fileitems = read_oss_report(after_f, after_basepath)
267+
elif YAML_EXT == after_ext:
268+
after_fileitems, _, _ = parsing_yml(after_yaml, after_basepath)
269+
270+
compared_result = compare_yaml(before_fileitems, after_fileitems)
262271
if compared_result != '':
263272
count_compared_result(compared_result)
264273
ret, result_file = write_compared_result(result_file, compared_result, file_ext, before_yaml, after_yaml)

src/fosslight_scanner/common.py

Lines changed: 72 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -7,18 +7,12 @@
77
import sys
88
import logging
99
import shutil
10-
import pandas as pd
11-
import yaml
12-
13-
import fosslight_util.constant as constant
14-
from fosslight_util.parsing_yaml import parsing_yml
15-
from fosslight_util.write_yaml import create_yaml_with_ossitem
10+
import copy
11+
from fosslight_util.constant import LOGGER_NAME, FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY, FOSSLIGHT_DEPENDENCY
1612
from fosslight_util.write_scancodejson import write_scancodejson
17-
from fosslight_util.read_excel import read_oss_report
18-
from fosslight_util.output_format import write_output_file
19-
from fosslight_util.oss_item import OssItem
13+
from fosslight_util.oss_item import OssItem, FileItem
2014

21-
logger = logging.getLogger(constant.LOGGER_NAME)
15+
logger = logging.getLogger(LOGGER_NAME)
2216
SRC_SHEET = 'SRC_FL_Source'
2317
BIN_SHEET = 'BIN_FL_Binary'
2418
BIN_EXT_HEADER = {
@@ -93,72 +87,23 @@ def call_analysis_api(path_to_run, str_run_start, return_idx, func, *args, **kwa
9387
return success, result or []
9488

9589

96-
def overwrite_excel(excel_file_path, oss_name, column_name='OSS Name'):
97-
if oss_name:
98-
try:
99-
files = os.listdir(excel_file_path)
100-
for file in files:
101-
if file.endswith(".xlsx"):
102-
file_path = os.path.join(excel_file_path, file)
103-
excel_file = pd.ExcelFile(file_path, engine='openpyxl')
104-
105-
for sheet_name in excel_file.sheet_names:
106-
try:
107-
df = pd.read_excel(file_path, sheet_name=sheet_name, engine='openpyxl')
108-
if column_name in df.columns:
109-
updated = (df[column_name] == '') | (df[column_name].isnull())
110-
df.loc[updated, column_name] = oss_name
111-
df.to_excel(file_path, sheet_name=sheet_name, index=False)
112-
except Exception as ex:
113-
logger.debug(f"overwrite_sheet {sheet_name}:{ex}")
114-
except Exception as ex:
115-
logger.debug(f"overwrite_excel:{ex}")
116-
117-
118-
def merge_yamls(_output_dir, merge_yaml_files, final_report, remove_src_data=False,
119-
default_oss_name='', default_oss_version='', url=''):
120-
success = True
121-
err_msg = ''
122-
123-
oss_total_list = []
124-
yaml_dict = {}
125-
try:
126-
for mf in merge_yaml_files:
127-
if os.path.exists(os.path.join(_output_dir, mf)):
128-
oss_list, _, _ = parsing_yml(os.path.join(_output_dir, mf), _output_dir)
129-
130-
if remove_src_data:
131-
existed_yaml = {}
132-
for oi in oss_list:
133-
oi.name = default_oss_name if oi.name == '' else oi.name
134-
oi.version = default_oss_version if oi.version == '' else oi.version
135-
oi.download_location = url if oi.download_location == '' else oi.download_location
136-
create_yaml_with_ossitem(oi, existed_yaml)
137-
with open(os.path.join(_output_dir, mf), 'w') as f:
138-
yaml.dump(existed_yaml, f, default_flow_style=False, sort_keys=False)
139-
140-
oss_total_list.extend(oss_list)
141-
142-
if oss_total_list != []:
143-
for oti in oss_total_list:
144-
create_yaml_with_ossitem(oti, yaml_dict)
145-
with open(os.path.join(_output_dir, final_report), 'w') as f:
146-
yaml.dump(yaml_dict, f, default_flow_style=False, sort_keys=False)
147-
else:
148-
success = False
149-
err_msg = "Output file is not created as no oss items detected."
150-
except Exception as ex:
151-
err_msg = ex
152-
success = False
153-
154-
return success, err_msg
90+
def update_oss_item(scan_item, oss_name, oss_version, download_loc):
91+
for file_items in scan_item.file_items.values():
92+
for file_item in file_items:
93+
if file_item.oss_items:
94+
for oi in file_item.oss_items:
95+
if oi.name == '' and oi.version == '' and oi.download_location == '':
96+
oi.name = oss_name
97+
oi.version = oss_version
98+
oi.download_location = download_loc
99+
else:
100+
file_item.oss_items.append(OssItem(oss_name, oss_version, '', download_loc))
101+
return scan_item
155102

156103

157-
def create_scancodejson(final_report, output_extension, ui_mode_report, src_path=""):
104+
def create_scancodejson(all_scan_item_origin, ui_mode_report, src_path=""):
158105
success = True
159106
err_msg = ''
160-
161-
oss_total_list = []
162107
root_dir = ""
163108
root_strip = ""
164109
try:
@@ -170,129 +115,96 @@ def create_scancodejson(final_report, output_extension, ui_mode_report, src_path
170115
root_dir = ""
171116

172117
try:
173-
item_without_oss = OssItem("")
174-
oss_total_list = get_osslist(os.path.dirname(final_report), os.path.basename(final_report),
175-
output_extension, '')
118+
all_scan_item = copy.deepcopy(all_scan_item_origin)
119+
if FOSSLIGHT_DEPENDENCY in all_scan_item.file_items:
120+
del all_scan_item.file_items[FOSSLIGHT_DEPENDENCY]
176121
if src_path:
177-
for root, dirs, files in os.walk(src_path):
122+
fileitems_without_oss = []
123+
for root, _, files in os.walk(src_path):
178124
root = root.replace(root_strip, "")
179125
for file in files:
126+
fi_without_oss = FileItem('')
127+
included = False
180128
item_path = os.path.join(root, file)
181129
item_path = item_path.replace(parent + os.path.sep, '', 1)
182-
included = any(item_path in x.source_name_or_path for x in oss_total_list)
130+
131+
for file_items in all_scan_item.file_items.values():
132+
for file_item in file_items:
133+
if file_item.source_name_or_path:
134+
if file_item.source_name_or_path == item_path:
135+
included = True
136+
break
183137
if not included:
184-
item_without_oss.source_name_or_path = item_path
185-
if len(item_without_oss.source_name_or_path) > 0:
186-
oss_total_list.append(item_without_oss)
138+
fi_without_oss.source_name_or_path = item_path
139+
fileitems_without_oss.append(fi_without_oss)
140+
if len(fileitems_without_oss) > 0:
141+
all_scan_item.file_items[FOSSLIGHT_SOURCE].extend(fileitems_without_oss)
187142
if root_dir:
188-
for oss in oss_total_list:
189-
tmp_path_list = oss.source_name_or_path
190-
oss.source_name_or_path = ""
191-
oss.source_name_or_path = [os.path.join(root_dir, path) for path in tmp_path_list]
192-
143+
for file_items in all_scan_item.file_items.values():
144+
for fi in file_items:
145+
if fi.source_name_or_path:
146+
fi.source_name_or_path = os.path.join(root_dir, fi.source_name_or_path)
193147
write_scancodejson(os.path.dirname(ui_mode_report), os.path.basename(ui_mode_report),
194-
oss_total_list)
148+
all_scan_item)
195149
except Exception as ex:
196150
err_msg = ex
197151
success = False
198152

199153
return success, err_msg
200154

201155

202-
def correct_scanner_result(_output_dir, output_files, output_extension, exist_src, exist_bin):
203-
src_oss_list = []
204-
bin_oss_list = []
156+
def correct_scanner_result(all_scan_item):
205157
duplicates = False
206158

207-
if exist_src:
208-
src_oss_list = check_exclude_dir(get_osslist(_output_dir, output_files['SRC'], output_extension, SRC_SHEET))
209-
if exist_bin:
210-
bin_oss_list = check_exclude_dir(get_osslist(_output_dir, output_files['BIN'], output_extension, BIN_SHEET))
211-
212-
if exist_src and exist_bin:
159+
keys_needed = {FOSSLIGHT_SOURCE, FOSSLIGHT_BINARY}
160+
is_contained = keys_needed.issubset(all_scan_item.file_items.keys())
161+
if is_contained:
162+
src_fileitems = all_scan_item.file_items[FOSSLIGHT_SOURCE]
163+
bin_fileitems = all_scan_item.file_items[FOSSLIGHT_BINARY]
213164
try:
214165
remove_src_idx_list = []
215-
for idx_src, src_item in enumerate(src_oss_list):
166+
for idx_src, src_fileitem in enumerate(src_fileitems):
167+
src_fileitem.exclude = check_exclude_dir(src_fileitem.source_name_or_path, src_fileitem.exclude)
216168
dup_flag = False
217-
for bin_item in bin_oss_list:
218-
if (not src_item.source_name_or_path):
219-
continue
220-
if src_item.source_name_or_path[0] == bin_item.source_name_or_path[0]:
169+
for bin_fileitem in bin_fileitems:
170+
bin_fileitem.exclude = check_exclude_dir(bin_fileitem.source_name_or_path, bin_fileitem.exclude)
171+
if src_fileitem.source_name_or_path == bin_fileitem.source_name_or_path:
221172
dup_flag = True
222-
if not bin_item.license and src_item.license:
223-
src_item.exclude = bin_item.exclude
224-
bin_item.set_sheet_item(src_item.get_print_array(constant.FL_BINARY)[0])
225-
if bin_item.comment:
226-
bin_item.comment += '/'
227-
bin_item.comment += 'Loaded from SRC OSS info'
173+
src_all_licenses_non_empty = all(oss_item.license for oss_item in src_fileitem.oss_items)
174+
bin_empty_license_exists = all(not oss_item.license for oss_item in bin_fileitem.oss_items)
175+
176+
if src_all_licenses_non_empty and bin_empty_license_exists:
177+
exclude = bin_fileitem.oss_items[0].exclude
178+
bin_fileitem.oss_items = []
179+
for src_oss_item in src_fileitem.oss_items:
180+
src_oss_item.exclude = exclude
181+
bin_fileitem.oss_items.append(src_oss_item)
182+
bin_fileitem.comment = 'Loaded from SRC OSS info'
228183
if dup_flag:
229184
remove_src_idx_list.append(idx_src)
230185
if remove_src_idx_list:
231186
duplicates = True
232187
for i in sorted(remove_src_idx_list, reverse=True):
233-
del src_oss_list[i]
188+
del src_fileitems[i]
234189
except Exception as ex:
235190
logger.warning(f"correct the scanner result:{ex}")
236191

237192
try:
238-
if exist_src:
239-
success, err_msg = write_output_with_osslist(src_oss_list, _output_dir, output_files['SRC'],
240-
output_extension, SRC_SHEET)
241-
if not success:
242-
logger.warning(err_msg)
243-
if exist_bin:
244-
success, err_msg = write_output_with_osslist(bin_oss_list, _output_dir, output_files['BIN'],
245-
output_extension, BIN_SHEET, BIN_EXT_HEADER, BIN_HIDDEN_HEADER)
246-
if not success:
247-
logger.warning(err_msg)
248193
if duplicates:
249194
logger.info('Success to correct the src/bin scanner result')
250195
except Exception as ex:
251196
logger.warning(f"Corrected src/bin scanner result:{ex}")
252-
return
253-
254-
255-
def write_output_with_osslist(oss_list, output_dir, output_file, output_extension, sheetname, extended_hdr={}, hidden_hdr={}):
256-
new_oss_list = []
257-
sheet_list = {}
258-
sheet_list[sheetname] = []
259-
260-
for src_item in oss_list:
261-
scanner_name = constant.supported_sheet_and_scanner[sheetname]
262-
new_oss_list.append(src_item.get_print_array(scanner_name)[0])
263-
sheet_list[sheetname].extend(new_oss_list)
264-
if os.path.exists(os.path.join(output_dir, output_file)):
265-
os.remove(os.path.join(output_dir, output_file))
266-
success, err_msg, _ = write_output_file(os.path.join(output_dir, output_file).rstrip(output_extension),
267-
output_extension, sheet_list, extended_hdr, hidden_hdr)
268-
return success, err_msg
269-
270-
271-
def get_osslist(_output_dir, output_file, output_extension, sheet_name=''):
272-
err_reason = ''
273-
oss_list = []
274-
oss_file_with_fullpath = os.path.join(_output_dir, output_file)
275-
276-
if os.path.exists(oss_file_with_fullpath):
277-
if output_extension == '.xlsx':
278-
oss_list = read_oss_report(oss_file_with_fullpath, sheet_name)
279-
elif output_extension == '.yaml':
280-
oss_list, _, err_reason = parsing_yml(oss_file_with_fullpath, _output_dir)
281-
else:
282-
err_reason = f'Not supported extension: {output_extension}'
283-
if err_reason:
284-
logger.info(f'get_osslist: {err_reason}')
285-
return oss_list
197+
return all_scan_item
286198

287199

288-
def check_exclude_dir(oss_list):
200+
def check_exclude_dir(source_name_or_path, file_item_exclude):
201+
if file_item_exclude:
202+
return True
289203
_exclude_dirs = ["venv", "node_modules", "Pods", "Carthage"]
204+
exclude = False
290205

291-
for oss_item in oss_list:
292-
if not oss_item.source_name_or_path:
293-
continue
294-
for exclude_dir in _exclude_dirs:
295-
if exclude_dir in oss_item.source_name_or_path[0].split(os.path.sep):
296-
oss_item.exclude = True
297-
break
298-
return oss_list
206+
for exclude_dir in _exclude_dirs:
207+
if exclude_dir in source_name_or_path.split(os.path.sep):
208+
exclude = True
209+
break
210+
return exclude

0 commit comments

Comments
 (0)