@@ -31,6 +31,7 @@ def simpleapi_download(
3131 parallel_download = True ,
3232 read_simpleapi = None ,
3333 get_auth = None ,
34+ _print = print ,
3435 _fail = fail ):
3536 """Download Simple API HTML.
3637
@@ -43,6 +44,8 @@ def simpleapi_download(
4344 separate packages.
4445 * extra_index_urls: Extra index URLs that will be looked up after
4546 the main is looked up.
47+ * index_strategy: The string identifier representing the strategy
48+ used here. Can be either "first-index" or "unsafe".
4649 * sources: list[str], the sources to download things for. Each value is
4750 the contents of requirements files.
4851 * envsubst: list[str], the envsubst vars for performing substitution in index url.
@@ -61,6 +64,7 @@ def simpleapi_download(
6164 read_simpleapi: a function for reading and parsing of the SimpleAPI contents.
6265 Used in tests.
6366 get_auth: A function to get auth information passed to read_simpleapi. Used in tests.
67+ _print: a function to print. Used in tests.
6468 _fail: a function to print a failure. Used in tests.
6569
6670 Returns:
@@ -71,6 +75,9 @@ def simpleapi_download(
7175 for p , i in (attr .index_url_overrides or {}).items ()
7276 }
7377
78+ if attr .index_strategy not in ["unsafe" , "first-index" ]:
79+ fail ("TODO" )
80+
7481 download_kwargs = {}
7582 if bazel_features .external_deps .download_has_block_param :
7683 download_kwargs ["block" ] = not parallel_download
@@ -80,68 +87,108 @@ def simpleapi_download(
8087 contents = {}
8188 index_urls = [attr .index_url ] + attr .extra_index_urls
8289 read_simpleapi = read_simpleapi or _read_simpleapi
90+ sources = {
91+ pkg : normalize_name (pkg )
92+ for pkg in attr .sources
93+ }
8394
84- found_on_index = {}
95+ found_on_indexes = {}
8596 warn_overrides = False
8697 for i , index_url in enumerate (index_urls ):
8798 if i != 0 :
8899 # Warn the user about a potential fix for the overrides
89100 warn_overrides = True
90101
91102 async_downloads = {}
92- sources = [pkg for pkg in attr .sources if pkg not in found_on_index ]
93- for pkg in sources :
103+ for pkg , pkg_normalized in sources .items ():
104+ if pkg not in found_on_indexes :
105+ # We have not found the pkg yet, let's search for it
106+ pass
107+ elif "first-index" == attr .index_strategy and pkg in found_on_indexes :
108+ # We have found it and we are using a safe strategy, let's not
109+ # search anymore.
110+ continue
111+ elif pkg in found_on_indexes and pkg_normalized in index_url_overrides :
112+ # This pkg has been overriden, be strict and use `first-index` strategy
113+ # implicitly.
114+ continue
115+ elif "unsafe" in attr .index_strategy :
116+ # We can search for the packages
117+ pass
118+ else :
119+ fail ("BUG: Unknown state of searching of packages" )
120+
94121 pkg_normalized = normalize_name (pkg )
95- result = read_simpleapi (
96- ctx = ctx ,
97- url = "{}/{}/" .format (
98- index_url_overrides .get (pkg_normalized , index_url ).rstrip ("/" ),
99- pkg ,
100- ),
101- attr = attr ,
102- cache = cache ,
103- get_auth = get_auth ,
104- ** download_kwargs
105- )
106- if hasattr (result , "wait" ):
107- # We will process it in a separate loop:
108- async_downloads [pkg ] = struct (
109- pkg_normalized = pkg_normalized ,
110- wait = result .wait ,
122+ override_urls = index_url_overrides .get (pkg_normalized , index_url )
123+ for url in override_urls .split ("," ):
124+ result = read_simpleapi (
125+ ctx = ctx ,
126+ url = "{}/{}/" .format (
127+ url .rstrip ("/" ),
128+ pkg ,
129+ ),
130+ attr = attr ,
131+ cache = cache ,
132+ get_auth = get_auth ,
133+ ** download_kwargs
111134 )
112- elif result .success :
113- contents [pkg_normalized ] = result .output
114- found_on_index [pkg ] = index_url
135+ if hasattr (result , "wait" ):
136+ # We will process it in a separate loop:
137+ async_downloads .setdefault (pkg , []).append (
138+ struct (
139+ pkg_normalized = pkg_normalized ,
140+ wait = result .wait ,
141+ ),
142+ )
143+ elif result .success :
144+ current = contents .get (
145+ pkg_normalized ,
146+ struct (sdists = {}, whls = {}),
147+ )
148+ contents [pkg_normalized ] = struct (
149+ # Always prefer the current values, so that the first index wins
150+ sdists = result .output .sdists | current .sdists ,
151+ whls = result .output .whls | current .whls ,
152+ )
153+ found_on_indexes .setdefault (pkg , []).append (url )
115154
116155 if not async_downloads :
117156 continue
118157
119158 # If we use `block` == False, then we need to have a second loop that is
120159 # collecting all of the results as they were being downloaded in parallel.
121- for pkg , download in async_downloads .items ():
122- result = download .wait ()
123-
124- if result .success :
125- contents [download .pkg_normalized ] = result .output
126- found_on_index [pkg ] = index_url
127-
128- failed_sources = [pkg for pkg in attr .sources if pkg not in found_on_index ]
160+ for pkg , downloads in async_downloads .items ():
161+ for download in downloads :
162+ result = download .wait ()
163+
164+ if result .success :
165+ current = contents .get (
166+ download .pkg_normalized ,
167+ struct (sdists = {}, whls = {}),
168+ )
169+ contents [download .pkg_normalized ] = struct (
170+ # Always prefer the current values, so that the first index wins
171+ sdists = result .output .sdists | current .sdists ,
172+ whls = result .output .whls | current .whls ,
173+ )
174+ found_on_indexes .setdefault (pkg , []).append (index_url )
175+
176+ failed_sources = [pkg for pkg in attr .sources if pkg not in found_on_indexes ]
129177 if failed_sources :
130- _fail ("Failed to download metadata for {} for from urls: {}" .format (
178+ _fail ("Failed to download metadata for {} from urls: {}" .format (
131179 failed_sources ,
132180 index_urls ,
133181 ))
134182 return None
135183
136184 if warn_overrides :
137185 index_url_overrides = {
138- pkg : found_on_index [pkg ]
186+ pkg : "," . join ( found_on_indexes [pkg ])
139187 for pkg in attr .sources
140- if found_on_index [pkg ] != attr .index_url
188+ if found_on_indexes [pkg ] != attr .index_url
141189 }
142190
143- # buildifier: disable=print
144- print ("You can use the following `index_url_overrides` to avoid the 404 warnings:\n {}" .format (
191+ _print ("You can use the following `index_url_overrides` to avoid the 404 warnings:\n {}" .format (
145192 render .dict (index_url_overrides ),
146193 ))
147194
0 commit comments