Skip to content

Commit 7442ac3

Browse files
authored
Add function for downloading source
2 parents e592199 + 8cdf595 commit 7442ac3

File tree

7 files changed

+480
-1
lines changed

7 files changed

+480
-1
lines changed

README.md

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ It is a package that supports common utils used by FOSSLight Scanner.
1414
3. It provides a simple function to create a text file.
1515
4. It defines common constant variables.
1616
5. It provides a thread that prints the spinner.
17+
6. Download source code.
1718

1819
[or]: http://collab.lge.com/main/x/xDHlFg
1920

@@ -110,6 +111,27 @@ timer.setDaemon(True)
110111
timer.start()
111112
```
112113

114+
### 6. Download the source code (tests/test_download.py)
115+
If you give a link, the source is downloaded to the target directory through git clone or wget.
116+
117+
#### How it works
118+
1. Try git clone.
119+
2. If git clone fails, download it with wget and extract the compressed file.
120+
3. After extracting the compressed file, delete the compressed file.
121+
122+
#### Parameters
123+
| Parameter | Argument | Description |
124+
| ------------- | ------------- | ------------- |
125+
| h | None | Print help message. |
126+
| s | String | Link to download. |
127+
| t | String | Path to download and extract. |
128+
| d | String | Path to save a log file. |
129+
130+
#### How to run
131+
```
132+
$ fosslight_download -s "https://github.com/LGE-OSS/example" -t target_dir/
133+
```
134+
113135
## 👏 How to report issue
114136

115137
Please report any ideas or bugs to improve by creating an issue in [fosslight_util repository][cl]. Then there will be quick bug fixes and upgrades. Ideas to improve are always welcome.

requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ progress
66
PyYAML
77
lastversion
88
coloredlogs
9+
pygit2
10+
python3-wget
11+
beautifulsoup4

setup.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,10 @@
3030
"Programming Language :: Python :: 3.7",
3131
"Programming Language :: Python :: 3.8",
3232
"Programming Language :: Python :: 3.9", ],
33-
install_requires=required
33+
install_requires=required,
34+
entry_points={
35+
"console_scripts": [
36+
"fosslight_download = fosslight_util.download:main",
37+
]
38+
}
3439
)
Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
# Copyright (c) 2020 LG Electronics Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
import logging
6+
import re
7+
from bs4 import BeautifulSoup
8+
from urllib.request import urlopen
9+
import fosslight_util.constant as constant
10+
11+
logger = logging.getLogger(constant.LOGGER_NAME)
12+
13+
14+
def get_downloadable_url(link):
15+
16+
ret = False
17+
new_link = ''
18+
19+
link = link.replace('http://', '')
20+
link = link.replace('https://', '')
21+
22+
if link.startswith('pypi.org/'):
23+
ret, new_link = get_download_location_for_pypi(link)
24+
elif link.startswith('mvnrepository.com/artifact/') or link.startswith('repo1.maven.org/'):
25+
ret, new_link = get_download_location_for_maven(link)
26+
elif link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org'):
27+
ret, new_link = get_download_location_for_npm(link)
28+
elif link.startswith('pub.dev/'):
29+
ret, new_link = get_download_location_for_pub(link)
30+
31+
return ret, new_link
32+
33+
34+
def get_download_location_for_pypi(link):
35+
# get the url for downloading source file in pypi.org/project/(oss_name)/(oss_version)/#files
36+
ret = False
37+
new_link = ''
38+
39+
try:
40+
dn_loc_re = re.findall(r'pypi.org\/project\/?([^\/]*)\/?([^\/]*)', link)
41+
oss_name = dn_loc_re[0][0]
42+
oss_version = dn_loc_re[0][1]
43+
44+
pypi_url = 'https://pypi.org/project/' + oss_name + '/' + oss_version + '/#files'
45+
46+
content = urlopen(pypi_url).read().decode('utf8')
47+
bs_obj = BeautifulSoup(content, 'html.parser')
48+
49+
tr_list = bs_obj.find('div', {'id': 'files'}).findAll('tr')
50+
for i in tr_list:
51+
td = i.findAll('td')
52+
for td_i in td:
53+
str_i = str(td_i).replace('\n', ' ')
54+
if re.findall(r'<span class="table__mobile-label">File type</span>[\s]*(Source)[\s]*</td>', str_i):
55+
new_link = i.find('a').attrs['href']
56+
ret = True
57+
break
58+
except Exception as error:
59+
ret = False
60+
logger.warning('Cannot find the link for pypi (url:'+link+') '+str(error))
61+
62+
return ret, new_link
63+
64+
65+
def get_download_location_for_maven(link):
66+
# get the url for downloading source file in
67+
# repo1.maven.org/maven2/(group_id(split to separator '/'))/(artifact_id)/(oss_version)
68+
ret = False
69+
new_link = ''
70+
71+
try:
72+
if link.startswith('mvnrepository.com/artifact/'):
73+
dn_loc_split = link.replace('mvnrepository.com/', '').split('/')
74+
group_id = dn_loc_split[1].replace('.', '/')
75+
dn_loc = 'https://repo1.maven.org/maven2/' + group_id + '/' + dn_loc_split[2] + '/' + dn_loc_split[3]
76+
77+
elif link.startswith('repo1.maven.org/maven2/'):
78+
dn_loc_split = link.replace('repo1.maven.org/maven2/', '').split('/')
79+
80+
if link.endswith('.tar.gz') or link.endswith('.jar') or link.endswith('.tar.xz'):
81+
new_link = 'https://' + link
82+
ret = True
83+
return ret, new_link
84+
else:
85+
dn_loc = 'https://' + link
86+
else:
87+
raise Exception("not valid url for maven")
88+
89+
html = urlopen(dn_loc).read().decode('utf8')
90+
bs_obj = BeautifulSoup(html, 'html.parser')
91+
92+
file_name = dn_loc.split('/')[-2] + '-' + dn_loc.split('/')[-1] + '-sources.jar'
93+
94+
for link in bs_obj.findAll("a"):
95+
if link.text == file_name:
96+
source_url = link['href']
97+
new_link = dn_loc + '/' + source_url
98+
break
99+
elif link['href'].endswith('sources.jar') or link['href'].endswith('source.jar') or link['href'].endswith('src.jar'):
100+
source_url = link['href']
101+
new_link = dn_loc + '/' + source_url
102+
103+
if new_link != '':
104+
ret = True
105+
106+
except Exception as error:
107+
ret = False
108+
logger.warning('Cannot find the link for maven (url:'+link+') '+str(error))
109+
110+
return ret, new_link
111+
112+
113+
def get_download_location_for_npm(link):
114+
# url format : registry.npmjs.org/packagename/-/packagename-version.tgz
115+
ret = False
116+
new_link = ''
117+
118+
try:
119+
if link.startswith('www.npmjs.com/') or link.startswith('registry.npmjs.org'):
120+
dn_loc_split = link.split('/')
121+
122+
if dn_loc_split[1] == 'package':
123+
idx = 2
124+
else:
125+
idx = 1
126+
127+
if dn_loc_split[idx].startswith('@'):
128+
oss_name_npm = dn_loc_split[idx]+'/'+dn_loc_split[idx+1]
129+
tar_name = dn_loc_split[idx+1] + '-' + dn_loc_split[idx+3]
130+
else:
131+
oss_name_npm = dn_loc_split[idx]
132+
tar_name = oss_name_npm + '-' + dn_loc_split[idx+2]
133+
134+
new_link = 'https://registry.npmjs.org/' + oss_name_npm + '/-/' + tar_name + '.tgz'
135+
ret = True
136+
137+
except Exception as error:
138+
ret = False
139+
logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))
140+
141+
return ret, new_link
142+
143+
144+
def get_download_location_for_pub(link):
145+
ret = False
146+
new_link = ''
147+
148+
# url format : https://pub.dev/packages/(oss_name)/versions/(oss_version)
149+
# download url format : https://storage.googleapis.com/pub-packages/packages/(oss_name)-(oss_version).tar.gz
150+
try:
151+
if link.startswith('pub.dev/packages'):
152+
dn_loc_split = link.split('/')
153+
oss_name_pub = dn_loc_split[2]
154+
oss_version_pub = dn_loc_split[4]
155+
156+
new_link = 'https://storage.googleapis.com/pub-packages/packages/' + oss_name_pub + '-' + oss_version_pub + '.tar.gz'
157+
ret = True
158+
159+
except Exception as error:
160+
ret = False
161+
logger.warning('Cannot find the link for npm (url:'+link+') '+str(error))
162+
163+
return ret, new_link

0 commit comments

Comments
 (0)