Skip to content

Commit ac9a717

Browse files
committed
Not uploading files if the package is already in the index
1 parent 903ce7c commit ac9a717

File tree

1 file changed

+58
-8
lines changed

1 file changed

+58
-8
lines changed

scripts/create_download_tracker.py

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import hashlib
55
import logging
66
import os
7+
from html.parser import HTMLParser
78

89
import boto3
910
import build
@@ -20,6 +21,40 @@
2021
INDEX_FILE_NAME = 'index.html'
2122

2223

24+
class PackageIndexHTMLParser(HTMLParser):
25+
"""Class to parse package index html files."""
26+
27+
def __init__(self):
28+
"""Initialize html parser for a package index html.
29+
30+
This class stores parameters to track the different links and packages stored in the index.
31+
"""
32+
super().__init__()
33+
self.package_to_href = {}
34+
self._current_href = None
35+
self._in_a_tag = False
36+
37+
def handle_starttag(self, tag, attrs):
38+
"""Get current href if the tag is an 'a' tag."""
39+
if tag == 'a':
40+
self._in_a_tag = True
41+
attrs_dict = dict(attrs)
42+
href = attrs_dict.get('href')
43+
if href:
44+
self._current_href = href
45+
46+
def handle_endtag(self, tag):
47+
"""Reset tag information if the tag is an 'a' tag."""
48+
if tag == 'a' and self._in_a_tag:
49+
self._in_a_tag = False
50+
self._current_href = None
51+
52+
def handle_data(self, data):
53+
"""Record href and package name if in an 'a' tag."""
54+
if self._in_a_tag:
55+
self.package_to_href[data] = self._current_href
56+
57+
2358
def _set_version(version):
2459
toml_path = os.path.join(PROJECT_PATH, 'pyproject.toml')
2560
with open(toml_path, 'rb') as f:
@@ -44,8 +79,7 @@ def _load_local_index_file():
4479
return file
4580

4681

47-
def _update_index_html(files, s3_client, dryrun=False):
48-
index_file_path = os.path.join(S3_PACKAGE_PATH, INDEX_FILE_NAME)
82+
def _get_index_file(s3_client, index_file_path, dryrun=False):
4983
if not dryrun:
5084
try:
5185
response = s3_client.get_object(Bucket=BUCKET, Key=index_file_path)
@@ -57,6 +91,10 @@ def _update_index_html(files, s3_client, dryrun=False):
5791
else:
5892
current_index_file = _load_local_index_file()
5993

94+
return current_index_file
95+
96+
97+
def _update_index_html(current_index_file, files, s3_client, index_file_path, dryrun=False):
6098
insertion_point = current_index_file.find('</body>')
6199
current_text = current_index_file[:insertion_point]
62100
text_list = [current_text]
@@ -91,6 +129,12 @@ def _get_file_hash(filepath):
91129
return h.hexdigest()
92130

93131

132+
def _get_links(index_file):
133+
parser = PackageIndexHTMLParser()
134+
parser.feed(index_file)
135+
return parser.package_to_href
136+
137+
94138
def upload_package(dryrun=False):
95139
"""Uploads the built package to the S3 bucket.
96140
@@ -101,17 +145,23 @@ def upload_package(dryrun=False):
101145
s3_client = boto3.client('s3')
102146
files = os.listdir('dist')
103147
files_to_hashes = {}
148+
index_file_path = os.path.join(S3_PACKAGE_PATH, INDEX_FILE_NAME)
149+
current_index_file = _get_index_file(s3_client, index_file_path, dryrun)
150+
links = _get_links(current_index_file)
104151
for file_name in files:
105152
dest = os.path.join(S3_PACKAGE_PATH, file_name)
106153
if dryrun:
107154
print(f'Uploading {file_name} as {dest} to bucket {BUCKET}') # noqa: T201 `print` found
108155
else:
109-
filepath = os.path.join('dist', file_name)
110-
file_hash = _get_file_hash(filepath)
111-
s3_client.upload_file(filepath, BUCKET, dest)
112-
files_to_hashes[file_name] = file_hash
113-
114-
_update_index_html(files_to_hashes, s3_client, dryrun)
156+
if file_name not in links:
157+
filepath = os.path.join('dist', file_name)
158+
file_hash = _get_file_hash(filepath)
159+
s3_client.upload_file(filepath, BUCKET, dest)
160+
files_to_hashes[file_name] = file_hash
161+
else:
162+
raise RuntimeError(f'The file {file_name} is already in this package index.')
163+
164+
_update_index_html(current_index_file, files_to_hashes, s3_client, index_file_path, dryrun)
115165

116166

117167
if __name__ == '__main__':

0 commit comments

Comments
 (0)