44import hashlib
55import logging
66import os
7+ from html .parser import HTMLParser
78
89import boto3
910import build
2021INDEX_FILE_NAME = 'index.html'
2122
2223
24+ class PackageIndexHTMLParser (HTMLParser ):
25+ """Class to parse package index html files."""
26+
27+ def __init__ (self ):
28+ """Initialize html parser for a package index html.
29+
30+ This class stores parameters to track the different links and packages stored in the index.
31+ """
32+ super ().__init__ ()
33+ self .package_to_href = {}
34+ self ._current_href = None
35+ self ._in_a_tag = False
36+
37+ def handle_starttag (self , tag , attrs ):
38+ """Get current href if the tag is an 'a' tag."""
39+ if tag == 'a' :
40+ self ._in_a_tag = True
41+ attrs_dict = dict (attrs )
42+ href = attrs_dict .get ('href' )
43+ if href :
44+ self ._current_href = href
45+
46+ def handle_endtag (self , tag ):
47+ """Reset tag information if the tag is an 'a' tag."""
48+ if tag == 'a' and self ._in_a_tag :
49+ self ._in_a_tag = False
50+ self ._current_href = None
51+
52+ def handle_data (self , data ):
53+ """Record href and package name if in an 'a' tag."""
54+ if self ._in_a_tag :
55+ self .package_to_href [data ] = self ._current_href
56+
57+
2358def _set_version (version ):
2459 toml_path = os .path .join (PROJECT_PATH , 'pyproject.toml' )
2560 with open (toml_path , 'rb' ) as f :
@@ -44,8 +79,7 @@ def _load_local_index_file():
4479 return file
4580
4681
47- def _update_index_html (files , s3_client , dryrun = False ):
48- index_file_path = os .path .join (S3_PACKAGE_PATH , INDEX_FILE_NAME )
82+ def _get_index_file (s3_client , index_file_path , dryrun = False ):
4983 if not dryrun :
5084 try :
5185 response = s3_client .get_object (Bucket = BUCKET , Key = index_file_path )
@@ -57,6 +91,10 @@ def _update_index_html(files, s3_client, dryrun=False):
5791 else :
5892 current_index_file = _load_local_index_file ()
5993
94+ return current_index_file
95+
96+
97+ def _update_index_html (current_index_file , files , s3_client , index_file_path , dryrun = False ):
6098 insertion_point = current_index_file .find ('</body>' )
6199 current_text = current_index_file [:insertion_point ]
62100 text_list = [current_text ]
@@ -91,6 +129,12 @@ def _get_file_hash(filepath):
91129 return h .hexdigest ()
92130
93131
132+ def _get_links (index_file ):
133+ parser = PackageIndexHTMLParser ()
134+ parser .feed (index_file )
135+ return parser .package_to_href
136+
137+
94138def upload_package (dryrun = False ):
95139 """Uploads the built package to the S3 bucket.
96140
@@ -101,17 +145,23 @@ def upload_package(dryrun=False):
101145 s3_client = boto3 .client ('s3' )
102146 files = os .listdir ('dist' )
103147 files_to_hashes = {}
148+ index_file_path = os .path .join (S3_PACKAGE_PATH , INDEX_FILE_NAME )
149+ current_index_file = _get_index_file (s3_client , index_file_path , dryrun )
150+ links = _get_links (current_index_file )
104151 for file_name in files :
105152 dest = os .path .join (S3_PACKAGE_PATH , file_name )
106153 if dryrun :
107154 print (f'Uploading { file_name } as { dest } to bucket { BUCKET } ' ) # noqa: T201 `print` found
108155 else :
109- filepath = os .path .join ('dist' , file_name )
110- file_hash = _get_file_hash (filepath )
111- s3_client .upload_file (filepath , BUCKET , dest )
112- files_to_hashes [file_name ] = file_hash
113-
114- _update_index_html (files_to_hashes , s3_client , dryrun )
156+ if file_name not in links :
157+ filepath = os .path .join ('dist' , file_name )
158+ file_hash = _get_file_hash (filepath )
159+ s3_client .upload_file (filepath , BUCKET , dest )
160+ files_to_hashes [file_name ] = file_hash
161+ else :
162+ raise RuntimeError (f'The file { file_name } is already in this package index.' )
163+
164+ _update_index_html (current_index_file , files_to_hashes , s3_client , index_file_path , dryrun )
115165
116166
117167if __name__ == '__main__' :
0 commit comments