Skip to content

Commit a6f16e1

Browse files
authored
Fix the pub download url bug (#147)
* Fix the pub download url bug Signed-off-by: Jiyeong Seok <[email protected]> * Add to get oss info with package url Signed-off-by: Jiyeong Seok <[email protected]> --------- Signed-off-by: Jiyeong Seok <[email protected]>
1 parent 80c3b50 commit a6f16e1

File tree

3 files changed

+140
-33
lines changed

3 files changed

+140
-33
lines changed

requirements.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ python3-wget
1010
beautifulsoup4
1111
jsonmerge
1212
spdx-tools==0.7.0rc0
13-
npm
1413
setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability
1514
numpy; python_version < '3.8'
16-
numpy>=1.22.2; python_version >= '3.8'
15+
numpy>=1.22.2; python_version >= '3.8'
16+
npm
17+
requests

src/fosslight_util/_get_downloadable_url.py

Lines changed: 103 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,32 +4,119 @@
44
# SPDX-License-Identifier: Apache-2.0
55
import logging
66
import re
7+
import requests
8+
from npm.bindings import npm_run
9+
from lastversion import latest
710
from bs4 import BeautifulSoup
811
from urllib.request import urlopen
912
import fosslight_util.constant as constant
10-
from npm.bindings import npm_run
1113

1214
logger = logging.getLogger(constant.LOGGER_NAME)
1315

1416

17+
def extract_name_version_from_link(link):
18+
# Github : https://github.com/(owner)/(repo)
19+
# npm : https://www.npmjs.com/package/(package)/v/(version)
20+
# npm2 : https://www.npmjs.com/package/@(group)/(package)/v/(version)
21+
# pypi : https://pypi.org/project/(oss_name)/(version)
22+
# pypi2 : https://files.pythonhosted.org/packages/source/(alphabet)/(oss_name)/(oss_name)-(version).tar.gz
23+
# Maven: https://mvnrepository.com/artifact/(group)/(artifact)/(version)
24+
# pub: https://pub.dev/packages/(package)/versions/(version)
25+
# Cocoapods: https://cocoapods.org/(package)
26+
pkg_pattern = {
27+
"pypi": r'https?:\/\/pypi\.org\/project\/([^\/]+)[\/]?([^\/]*)',
28+
"pypi2": r'https?:\/\/files\.pythonhosted\.org\/packages\/source\/[\w]\/([^\/]+)\/[\S]+-([^\-]+)\.tar\.gz',
29+
"maven": r'https?:\/\/mvnrepository\.com\/artifact\/([^\/]+)\/([^\/]+)\/?([^\/]*)',
30+
"npm": r'https?:\/\/www\.npmjs\.com\/package\/([^\/\@]+)(?:\/v\/)?([^\/]*)',
31+
"npm2": r'https?:\/\/www\.npmjs\.com\/package\/(\@[^\/]+\/[^\/]+)(?:\/v\/)?([^\/]*)',
32+
"pub": r'https?:\/\/pub\.dev\/packages\/([^\/]+)(?:\/versions\/)?([^\/]*)',
33+
"pods": r'https?:\/\/cocoapods\.org\/pods\/([^\/]+)'
34+
}
35+
oss_name = ""
36+
oss_version = ""
37+
if link.startswith("www."):
38+
link = link.replace("www.", "https://www.", 1)
39+
for key, value in pkg_pattern.items():
40+
p = re.compile(value)
41+
match = p.match(link)
42+
if match:
43+
try:
44+
origin_name = match.group(1)
45+
if (key == "pypi") or (key == "pypi2"):
46+
oss_name = f"pypi:{origin_name}"
47+
oss_name = re.sub(r"[-_.]+", "-", oss_name).lower()
48+
oss_version = match.group(2)
49+
elif key == "maven":
50+
artifact = match.group(2)
51+
oss_name = f"{origin_name}:{artifact}"
52+
origin_name = oss_name
53+
oss_version = match.group(3)
54+
elif key == "npm" or key == "npm2":
55+
oss_name = f"npm:{origin_name}"
56+
oss_version = match.group(2)
57+
elif key == "pub":
58+
oss_name = f"pub:{origin_name}"
59+
oss_version = match.group(2)
60+
elif key == "pods":
61+
oss_name = f"cocoapods:{origin_name}"
62+
except Exception as ex:
63+
logger.info(f"extract_name_version_from_link {key}:{ex}")
64+
if oss_name and (not oss_version):
65+
if key in ["pypi", "maven", "npm", "npm2", "pub"]:
66+
oss_version, link = get_latest_package_version(link, key, origin_name)
67+
logger.debug(f'Try to download with the latest version:{link}')
68+
break
69+
return oss_name, oss_version, link, key
70+
71+
72+
def get_latest_package_version(link, pkg_type, oss_name):
73+
find_version = ''
74+
link_with_version = link
75+
76+
try:
77+
if pkg_type in ['npm', 'npm2']:
78+
stderr, stdout = npm_run('view', oss_name, 'version')
79+
if stdout:
80+
find_version = stdout.strip()
81+
link_with_version = f'https://www.npmjs.com/package/{oss_name}/v/{find_version}'
82+
elif pkg_type == 'pypi':
83+
find_version = str(latest(oss_name, at='pip', output_format='version', pre_ok=True))
84+
link_with_version = f'https://pypi.org/project/{oss_name}/{find_version}'
85+
elif pkg_type == 'maven':
86+
maven_response = requests.get(f'https://api.deps.dev/v3alpha/systems/maven/packages/{oss_name}')
87+
if maven_response.status_code == 200:
88+
find_version = maven_response.json().get('versions')[-1].get('versionKey').get('version')
89+
oss_name = oss_name.replace(':', '/')
90+
link_with_version = f'https://mvnrepository.com/artifact/{oss_name}/{find_version}'
91+
elif pkg_type == 'pub':
92+
pub_response = requests.get(f'https://pub.dev/api/packages/{oss_name}')
93+
if pub_response.status_code == 200:
94+
find_version = pub_response.json().get('latest').get('version')
95+
link_with_version = f'https://pub.dev/packages/{oss_name}/versions/{find_version}'
96+
except Exception as e:
97+
logger.debug(f'Fail to get latest package version({link}:{e})')
98+
return find_version, link_with_version
99+
100+
15101
def get_downloadable_url(link):
16102

17103
ret = False
18-
new_link = ''
104+
result_link = link
19105

20-
link = link.replace('http://', '')
21-
link = link.replace('https://', '')
106+
oss_name, oss_version, new_link, pkg_type = extract_name_version_from_link(link)
107+
new_link = new_link.replace('http://', '')
108+
new_link = new_link.replace('https://', '')
22109

23-
if link.startswith('pypi.org/'):
24-
ret, new_link = get_download_location_for_pypi(link)
25-
elif link.startswith('mvnrepository.com/artifact/') or link.startswith('repo1.maven.org/'):
26-
ret, new_link = get_download_location_for_maven(link)
27-
elif link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org/'):
28-
ret, new_link = get_download_location_for_npm(link)
29-
elif link.startswith('pub.dev/'):
30-
ret, new_link = get_download_location_for_pub(link)
110+
if pkg_type == "pypi":
111+
ret, result_link = get_download_location_for_pypi(new_link)
112+
elif pkg_type == "maven" or new_link.startswith('repo1.maven.org/'):
113+
ret, result_link = get_download_location_for_maven(new_link)
114+
elif (pkg_type in ["npm", "npm2"]) or new_link.startswith('registry.npmjs.org/'):
115+
ret, result_link = get_download_location_for_npm(new_link)
116+
elif pkg_type == "pub":
117+
ret, result_link = get_download_location_for_pub(new_link)
31118

32-
return ret, new_link
119+
return ret, result_link, oss_name, oss_version
33120

34121

35122
def get_download_location_for_pypi(link):
@@ -134,16 +221,9 @@ def get_download_location_for_npm(link):
134221
oss_name_npm = dn_loc_split[idx]
135222
tar_name = oss_name_npm
136223
oss_version = dn_loc_split[idx+2]
137-
except Exception:
138-
pass
139224

140-
try:
141-
if not oss_version:
142-
stderr, stdout = npm_run('view', oss_name_npm, 'version')
143-
if stdout:
144-
oss_version = stdout.strip()
145-
tar_name = f"{tar_name}-{oss_version}"
146-
new_link = 'https://registry.npmjs.org/' + oss_name_npm + '/-/' + tar_name + '.tgz'
225+
tar_name = f'{tar_name}-{oss_version}'
226+
new_link = f'https://registry.npmjs.org/{oss_name_npm}/-/{tar_name}.tgz'
147227
ret = True
148228
except Exception as error:
149229
ret = False
@@ -159,7 +239,7 @@ def get_download_location_for_pub(link):
159239
# download url format : https://pub.dev/packages/(oss_name)/versions/(oss_version).tar.gz
160240
try:
161241
if link.startswith('pub.dev/packages'):
162-
new_link = 'https://{link}.tar.gz'
242+
new_link = f'https://{link}.tar.gz'
163243
ret = True
164244

165245
except Exception as error:

src/fosslight_util/download.py

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import threading
2525
import platform
2626
import subprocess
27+
import re
2728

2829
logger = logging.getLogger(constant.LOGGER_NAME)
2930
compression_extension = {".tar.bz2", ".tar.gz", ".tar.xz", ".tgz", ".tar", ".zip", ".jar", ".bz2"}
@@ -115,6 +116,8 @@ def cli_download_and_extract(link, target_dir, log_dir, checkout_to="", compress
115116

116117
success = True
117118
msg = ""
119+
oss_name = ""
120+
oss_version = ""
118121
log_file_name = "fosslight_download_" + \
119122
datetime.now().strftime('%Y%m%d_%H-%M-%S')+".txt"
120123
logger, log_item = init_log(os.path.join(log_dir, log_file_name))
@@ -135,22 +138,29 @@ def cli_download_and_extract(link, target_dir, log_dir, checkout_to="", compress
135138
is_rubygems = src_info.get("rubygems", False)
136139

137140
# General download (git clone, wget)
138-
if (not is_rubygems) and (not download_git_clone(link, target_dir, checkout_to, tag, branch)):
141+
success_git, msg, oss_name = download_git_clone(link, target_dir, checkout_to, tag, branch)
142+
if (not is_rubygems) and (not success_git):
139143
if os.path.isfile(target_dir):
140144
shutil.rmtree(target_dir)
141145

142-
success, downloaded_file = download_wget(link, target_dir, compressed_only)
146+
success, downloaded_file, msg_wget, oss_name, oss_version = download_wget(link, target_dir, compressed_only)
143147
if success:
144148
success = extract_compressed_file(downloaded_file, target_dir, True)
145149
# Download from rubygems.org
146150
elif is_rubygems and shutil.which("gem"):
147151
success = gem_download(link, target_dir, checkout_to)
152+
if msg:
153+
msg = f'git fail: {msg}'
154+
if msg_wget:
155+
msg = f'{msg}, wget fail: {msg_wget}'
156+
else:
157+
msg = f'{msg}, wget success'
148158
except Exception as error:
149159
success = False
150160
msg = str(error)
151161

152-
logger.info(f"\n* FOSSLight Downloader - Result: {success}\n {msg}")
153-
return success, msg
162+
logger.info(f"\n* FOSSLight Downloader - Result: {success} ({msg})")
163+
return success, msg, oss_name, oss_version
154164

155165

156166
def get_ref_to_checkout(checkout_to, ref_list):
@@ -184,8 +194,19 @@ def decide_checkout(checkout_to="", tag="", branch=""):
184194
return ref_to_checkout
185195

186196

197+
def get_github_ossname(link):
198+
oss_name = ""
199+
p = re.compile(r'https?:\/\/github.com\/([^\/]+)\/([^\/\.]+)(\.git)?')
200+
match = p.match(link)
201+
if match:
202+
oss_name = f"{match.group(1)}-{match.group(2)}"
203+
return oss_name
204+
205+
187206
def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
188207
ref_to_checkout = decide_checkout(checkout_to, tag, branch)
208+
msg = ""
209+
oss_name = get_github_ossname(git_url)
189210

190211
if platform.system() != "Windows":
191212
signal.signal(signal.SIGALRM, alarm_handler)
@@ -204,7 +225,8 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
204225
del alarm
205226
except Exception as error:
206227
logger.warning(f"git clone - failed: {error}")
207-
return False
228+
msg = str(error)
229+
return False, msg, oss_name
208230
try:
209231
if ref_to_checkout != "":
210232
ref_list = [x for x in repo.references]
@@ -213,11 +235,14 @@ def download_git_clone(git_url, target_dir, checkout_to="", tag="", branch=""):
213235
repo.checkout(ref_to_checkout)
214236
except Exception as error:
215237
logger.warning(f"git checkout to {ref_to_checkout} - failed: {error}")
216-
return True
238+
return True, msg, oss_name
217239

218240

219241
def download_wget(link, target_dir, compressed_only):
220242
success = False
243+
msg = ""
244+
oss_name = ""
245+
oss_version = ""
221246
downloaded_file = ""
222247
if platform.system() != "Windows":
223248
signal.signal(signal.SIGALRM, alarm_handler)
@@ -228,7 +253,7 @@ def download_wget(link, target_dir, compressed_only):
228253
try:
229254
Path(target_dir).mkdir(parents=True, exist_ok=True)
230255

231-
ret, new_link = get_downloadable_url(link)
256+
ret, new_link, oss_name, oss_version = get_downloadable_url(link)
232257
if ret and new_link:
233258
link = new_link
234259

@@ -255,9 +280,10 @@ def download_wget(link, target_dir, compressed_only):
255280
logger.debug(f"wget - downloaded: {downloaded_file}")
256281
except Exception as error:
257282
success = False
283+
msg = str(error)
258284
logger.warning(f"wget - failed: {error}")
259285

260-
return success, downloaded_file
286+
return success, downloaded_file, msg, oss_name, oss_version
261287

262288

263289
def extract_compressed_dir(src_dir, target_dir, remove_after_extract=True):

0 commit comments

Comments
 (0)