Skip to content

Commit 17be51c

Browse files
committed
Extract the OSS name from the link
1 parent d1f69ed commit 17be51c

File tree

3 files changed

+82
-5
lines changed

3 files changed

+82
-5
lines changed

src/fosslight_scanner/common.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
import sys
88
import logging
99
from shutil import copy
10+
import re
11+
import pandas as pd
1012
import fosslight_util.constant as constant
1113

1214
logger = logging.getLogger(constant.LOGGER_NAME)
@@ -68,3 +70,75 @@ def call_analysis_api(path_to_run, str_run_start, return_idx, func, *args):
6870
if not result:
6971
result = []
7072
return success, result
73+
74+
75+
def extract_name_from_link(link):
76+
# Github : https://github.com/(owner)/(repo)
77+
# npm : www.npmjs.com/package/(package)
78+
# npm : https://www.npmjs.com/package/@(group)/(package)
79+
# pypi : https://pypi.org/project/(oss_name)
80+
# Maven: https://mvnrepository.com/artifact/(group)/(artifact)
81+
# pub: https://pub.dev/packages/(package)
82+
# Cocoapods: https://cocoapods.org/(package)
83+
pkg_pattern = {
84+
"github": r'https?:\/\/github.com\/([^\/]+)\/([^\/\.]+)(\.git)?',
85+
"pypi": r'https?:\/\/pypi\.org\/project\/([^\/]+)',
86+
"maven": r'https?:\/\/mvnrepository\.com\/artifact\/([^\/]+)\/([^\/]+)',
87+
"npm": r'https?:\/\/www\.npmjs\.com\/package\/([^\/]+)(\/[^\/]+)?',
88+
"pub": r'https?:\/\/pub\.dev\/packages\/([^\/]+)',
89+
"pods": r'https?:\/\/cocoapods\.org\/pods\/([^\/]+)'
90+
}
91+
oss_name = ""
92+
if link.startswith("www."):
93+
link = link.replace("www.", "https://www.", 1)
94+
for key, value in pkg_pattern.items():
95+
try:
96+
p = re.compile(value)
97+
match = p.match(link)
98+
if match:
99+
group = match.group(1)
100+
if key == "github":
101+
repo = match.group(2)
102+
oss_name = f"{group}-{repo}"
103+
break
104+
elif key == "pypi":
105+
oss_name = f"pypi:{group}"
106+
break
107+
elif key == "maven":
108+
artifact = match.group(2)
109+
oss_name = f"{group}:{artifact}"
110+
break
111+
elif key == "npm":
112+
if group.startswith("@"):
113+
pkg = match.group(2)
114+
oss_name = f"npm:{group}{pkg}"
115+
else:
116+
oss_name = f"npm:{group}"
117+
break
118+
elif key == "pub":
119+
oss_name = f"pub:{group}"
120+
break
121+
elif key == "pods":
122+
oss_name = f"cocoapods:{group}"
123+
break
124+
except Exception as ex:
125+
logger.debug(f"extract_name_from_link_{key}:{ex}")
126+
return oss_name
127+
128+
129+
def overwrite_excel(excel_file_path, oss_name, column_name='OSS Name'):
130+
if oss_name != "":
131+
try:
132+
files = os.listdir(excel_file_path)
133+
for file in files:
134+
if file.endswith(".xlsx"):
135+
file = os.path.join(excel_file_path, file)
136+
excel_file = pd.ExcelFile(file, engine='openpyxl')
137+
138+
for sheet_name in excel_file.sheet_names:
139+
df = pd.read_excel(file, sheet_name=sheet_name, engine='openpyxl')
140+
updated = (df[column_name] == '') | (df[column_name].isnull())
141+
df.loc[updated, column_name] = oss_name
142+
df.to_excel(file, sheet_name=sheet_name, index=False)
143+
except Exception as ex:
144+
logger.debug(f"overwrite_excel:{ex}")

src/fosslight_scanner/fosslight_scanner.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
import fosslight_util.constant as constant
2424
from fosslight_util.output_format import check_output_format
2525
from fosslight_reuse._fosslight_reuse import run_lint as reuse_lint
26-
from .common import copy_file, call_analysis_api
26+
from .common import (copy_file, call_analysis_api,
27+
overwrite_excel, extract_name_from_link)
2728
from fosslight_util.write_excel import merge_excels
2829

2930
OUTPUT_EXCEL_PREFIX = "FOSSLight-Report_"
@@ -90,7 +91,7 @@ def run_dependency(path_to_analyze, output_file_with_path, params=""):
9091
def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
9192
run_src=True, run_bin=True, run_dep=True, run_reuse=True,
9293
remove_src_data=True, result_log={}, output_file="",
93-
output_extension="", num_cores=-1, db_url=""):
94+
output_extension="", num_cores=-1, db_url="", default_oss_name=""):
9495
create_csv = False
9596
final_excel_dir = output_path
9697
success = True
@@ -157,6 +158,8 @@ def run_scanner(src_path, dep_arguments, output_path, keep_raw_data=False,
157158
try:
158159
output_file_without_ext = os.path.join(final_excel_dir, output_file)
159160
final_report = f"{output_file_without_ext}{output_extension}"
161+
if remove_src_data:
162+
overwrite_excel(_output_dir, default_oss_name)
160163
success, output_files = merge_excels(_output_dir, final_report, create_csv)
161164

162165
if success and output_files:
@@ -187,7 +190,7 @@ def download_source(link, out_dir):
187190
try:
188191
success, final_excel_dir, result_log = init(out_dir)
189192
temp_src_dir = os.path.join(
190-
_output_dir, SRC_DIR_FROM_LINK_PREFIX+start_time)
193+
_output_dir, SRC_DIR_FROM_LINK_PREFIX + start_time)
191194

192195
logger.info(f"Link to download: {link}")
193196
success, msg = cli_download_and_extract(
@@ -254,6 +257,7 @@ def run_main(mode, src_path, dep_arguments, output_file_or_dir, file_format, url
254257

255258
if url_to_analyze != "":
256259
remove_downloaded_source = True
260+
default_oss_name = extract_name_from_link(url_to_analyze)
257261
success, src_path = download_source(url_to_analyze, output_path)
258262

259263
if mode == "reuse":
@@ -274,7 +278,7 @@ def run_main(mode, src_path, dep_arguments, output_file_or_dir, file_format, url
274278
run_scanner(src_path, dep_arguments, output_path, keep_raw_data,
275279
run_src, run_bin, run_dep, run_reuse,
276280
remove_downloaded_source, {}, output_file,
277-
output_extension, num_cores, db_url)
281+
output_extension, num_cores, db_url, default_oss_name)
278282

279283
except Exception as ex:
280284
logger.warning(str(ex))

tox.ini

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ skipdist = true
66

77
[testenv]
88
install_command = pip install {opts} {packages}
9-
basepython= python3.6
109
whitelist_externals = cat
1110
cp
1211
rm

0 commit comments

Comments
 (0)