Skip to content

Commit fac91c3

Browse files
committed
Use GH API in download script since the web interface cannot be scraped without JS anymore.
1 parent 2667089 commit fac91c3

File tree

1 file changed

+37
-8
lines changed

1 file changed

+37
-8
lines changed

download_artefacts.py

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,24 +9,23 @@
99

1010
from concurrent.futures import ProcessPoolExecutor as Pool, as_completed
1111
from pathlib import Path
12-
from urllib.request import urlopen
12+
from urllib.request import urlopen, Request
1313
from urllib.parse import urljoin
1414

1515
logger = logging.getLogger()
1616

1717
PARALLEL_DOWNLOADS = 6
18-
GITHUB_PACKAGE_URL = "https://github.com/scoder/fastrlock"
18+
GITHUB_API_URL = "https://api.github.com/repos/scoder/fastrlock"
1919
APPVEYOR_PACKAGE_URL = "https://ci.appveyor.com/api/projects/scoder/fastrlock"
2020
APPVEYOR_BUILDJOBS_URL = "https://ci.appveyor.com/api/buildjobs"
2121

2222

23-
def find_github_files(version, base_package_url=GITHUB_PACKAGE_URL):
24-
url = f"{base_package_url}/releases/tag/{version}"
25-
with urlopen(url) as p:
26-
page = p.read().decode()
23+
def find_github_files(version, api_url=GITHUB_API_URL):
24+
url = f"{api_url}/releases/tags/{version}"
25+
release, _ = read_url(url, accept="application/vnd.github+json", as_json=True)
2726

28-
for wheel_url, _ in itertools.groupby(sorted(re.findall(r'href="([^"]+\.whl)"', page))):
29-
yield urljoin(base_package_url, wheel_url)
27+
for asset in release.get('assets', ()):
28+
yield asset['browser_download_url']
3029

3130

3231
def find_appveyor_files(version, base_package_url=APPVEYOR_PACKAGE_URL, base_job_url=APPVEYOR_BUILDJOBS_URL):
@@ -55,6 +54,36 @@ def find_appveyor_files(version, base_package_url=APPVEYOR_PACKAGE_URL, base_job
5554
yield urljoin(artifacts_url, artifact['fileName'])
5655

5756

57+
def read_url(url, decode=True, accept=None, as_json=False):
58+
if accept:
59+
request = Request(url, headers={'Accept': accept})
60+
else:
61+
request = Request(url)
62+
63+
with urlopen(request) as res:
64+
charset = _find_content_encoding(res)
65+
content_type = res.headers.get('Content-Type')
66+
data = res.read()
67+
68+
if decode:
69+
data = data.decode(charset)
70+
if as_json:
71+
data = json.loads(data)
72+
return data, content_type
73+
74+
75+
def _find_content_encoding(response, default='iso8859-1'):
76+
from email.message import Message
77+
content_type = response.headers.get('Content-Type')
78+
if content_type:
79+
msg = Message()
80+
msg.add_header('Content-Type', content_type)
81+
charset = msg.get_content_charset(default)
82+
else:
83+
charset = default
84+
return charset
85+
86+
5887
def download1(wheel_url, dest_dir):
5988
wheel_name = wheel_url.rsplit("/", 1)[1]
6089
logger.info(f"Downloading {wheel_url} ...")

0 commit comments

Comments
 (0)