@@ -23,7 +23,81 @@ load("//python/private:normalize_name.bzl", "normalize_name")
2323load ("//python/private:text_util.bzl" , "render" )
2424load (":parse_simpleapi_html.bzl" , "parse_simpleapi_html" )
2525
26- def  simpleapi_download (ctx , * , attr , cache , parallel_download  =  True ):
26+ def  _read_simpleapi (ctx , url , attr , cache , ** download_kwargs ):
27+     """Read SimpleAPI. 
28+ 
29+     Args: 
30+         ctx: The module_ctx or repository_ctx. 
31+         url: str, the url parameter that can be passed to ctx.download. 
32+         attr: The attribute that contains necessary info for downloading. The 
33+           following attributes must be present: 
34+            * envsubst: The envsubst values for performing substitutions in the URL. 
35+            * netrc: The netrc parameter for ctx.download, see http_file for docs. 
36+            * auth_patterns: The auth_patterns parameter for ctx.download, see 
37+                http_file for docs. 
38+         cache: A dict for storing the results. 
39+         **download_kwargs: Any extra params to ctx.download. 
40+             Note that output and auth will be passed for you. 
41+ 
42+     Returns: 
43+         A similar object to what `download` would return except that in result.out 
44+         will be the parsed simple api contents. 
45+     """ 
46+     # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for 
47+     # the whl location and we cannot handle multiple URLs at once by passing 
48+     # them to ctx.download if we want to correctly handle the relative URLs. 
49+     # TODO: Add a test that env subbed index urls do not leak into the lock file. 
50+ 
51+     real_url  =  envsubst (
52+         url ,
53+         attr .envsubst ,
54+         ctx .getenv  if  hasattr (ctx , "getenv" ) else  ctx .os .environ .get ,
55+     )
56+ 
57+     cache_key  =  real_url 
58+     if  cache_key  in  cache :
59+         return  struct (success  =  True , output  =  cache [cache_key ])
60+ 
61+     output_str  =  envsubst (
62+         url ,
63+         attr .envsubst ,
64+         # Use env names in the subst values - this will be unique over 
65+         # the lifetime of the execution of this function and we also use 
66+         # `~` as the separator to ensure that we don't get clashes. 
67+         {e : "~{}~" .format (e ) for  e  in  attr .envsubst }.get ,
68+     )
69+ 
70+     # Transform the URL into a valid filename 
71+     for  char  in  ["." , ":" , "/" , "\\ " , "-" ]:
72+         output_str  =  output_str .replace (char , "_" )
73+ 
74+     output  =  ctx .path (output_str .strip ("_" ).lower () +  ".html" )
75+ 
76+     # NOTE: this may have block = True or block = False in the download_kwargs 
77+     download  =  ctx .download (
78+         url  =  [real_url ],
79+         output  =  output ,
80+         auth  =  get_auth (ctx , [real_url ], ctx_attr  =  attr ),
81+         allow_fail  =  True ,
82+         ** download_kwargs 
83+     )
84+ 
85+     if  download_kwargs .get ("block" ) ==  False :
86+         # Simulate the same API as ctx.download has 
87+         return  struct (
88+             wait  =  lambda : _read_index_result (ctx , download .wait (), output , real_url , cache , cache_key ),
89+         )
90+ 
91+     return  _read_index_result (ctx , download , output , real_url , cache , cache_key )
92+ 
93+ def  simpleapi_download (
94+         ctx ,
95+         * ,
96+         attr ,
97+         cache ,
98+         parallel_download  =  True ,
99+         read_simpleapi  =  _read_simpleapi ,
100+         _fail  =  fail ):
27101    """Download Simple API HTML. 
28102
29103    Args: 
@@ -50,6 +124,9 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
50124            reflected when re-evaluating the extension unless we do 
51125            `bazel clean --expunge`. 
52126        parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. 
127+         read_simpleapi: a function for reading and parsing of the SimpleAPI contents. 
128+             Used in tests. 
129+         _fail: a function to print a failure. Used in tests. 
53130
54131    Returns: 
55132        dict of pkg name to the parsed HTML contents - a list of structs. 
@@ -79,7 +156,7 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
79156        sources  =  [pkg  for  pkg  in  attr .sources  if  pkg  not  in   found_on_index ]
80157        for  pkg  in  sources :
81158            pkg_normalized  =  normalize_name (pkg )
82-             result  =  _read_simpleapi (
159+             result  =  read_simpleapi (
83160                ctx  =  ctx ,
84161                url  =  "{}/{}/" .format (
85162                    index_url_overrides .get (pkg_normalized , index_url ).rstrip ("/" ),
@@ -95,7 +172,7 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
95172                    pkg_normalized  =  pkg_normalized ,
96173                    wait  =  result .wait ,
97174                )
98-             else :
175+             elif   result . success :
99176                contents [pkg_normalized ] =  result .output 
100177                found_on_index [pkg ] =  index_url 
101178
@@ -113,10 +190,11 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
113190
114191    failed_sources  =  [pkg  for  pkg  in  attr .sources  if  pkg  not  in   found_on_index ]
115192    if  failed_sources :
116-         fail ("Failed to download metadata for {} for from urls: {}" .format (
193+         _fail ("Failed to download metadata for {} for from urls: {}" .format (
117194            failed_sources ,
118195            index_urls ,
119196        ))
197+         return  None 
120198
121199    if  warn_overrides :
122200        index_url_overrides  =  {
@@ -132,73 +210,6 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
132210
133211    return  contents 
134212
135- def  _read_simpleapi (ctx , url , attr , cache , ** download_kwargs ):
136-     """Read SimpleAPI. 
137- 
138-     Args: 
139-         ctx: The module_ctx or repository_ctx. 
140-         url: str, the url parameter that can be passed to ctx.download. 
141-         attr: The attribute that contains necessary info for downloading. The 
142-           following attributes must be present: 
143-            * envsubst: The envsubst values for performing substitutions in the URL. 
144-            * netrc: The netrc parameter for ctx.download, see http_file for docs. 
145-            * auth_patterns: The auth_patterns parameter for ctx.download, see 
146-                http_file for docs. 
147-         cache: A dict for storing the results. 
148-         **download_kwargs: Any extra params to ctx.download. 
149-             Note that output and auth will be passed for you. 
150- 
151-     Returns: 
152-         A similar object to what `download` would return except that in result.out 
153-         will be the parsed simple api contents. 
154-     """ 
155-     # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for 
156-     # the whl location and we cannot handle multiple URLs at once by passing 
157-     # them to ctx.download if we want to correctly handle the relative URLs. 
158-     # TODO: Add a test that env subbed index urls do not leak into the lock file. 
159- 
160-     real_url  =  envsubst (
161-         url ,
162-         attr .envsubst ,
163-         ctx .getenv  if  hasattr (ctx , "getenv" ) else  ctx .os .environ .get ,
164-     )
165- 
166-     cache_key  =  real_url 
167-     if  cache_key  in  cache :
168-         return  struct (success  =  True , output  =  cache [cache_key ])
169- 
170-     output_str  =  envsubst (
171-         url ,
172-         attr .envsubst ,
173-         # Use env names in the subst values - this will be unique over 
174-         # the lifetime of the execution of this function and we also use 
175-         # `~` as the separator to ensure that we don't get clashes. 
176-         {e : "~{}~" .format (e ) for  e  in  attr .envsubst }.get ,
177-     )
178- 
179-     # Transform the URL into a valid filename 
180-     for  char  in  ["." , ":" , "/" , "\\ " , "-" ]:
181-         output_str  =  output_str .replace (char , "_" )
182- 
183-     output  =  ctx .path (output_str .strip ("_" ).lower () +  ".html" )
184- 
185-     # NOTE: this may have block = True or block = False in the download_kwargs 
186-     download  =  ctx .download (
187-         url  =  [real_url ],
188-         output  =  output ,
189-         auth  =  get_auth (ctx , [real_url ], ctx_attr  =  attr ),
190-         allow_fail  =  True ,
191-         ** download_kwargs 
192-     )
193- 
194-     if  download_kwargs .get ("block" ) ==  False :
195-         # Simulate the same API as ctx.download has 
196-         return  struct (
197-             wait  =  lambda : _read_index_result (ctx , download .wait (), output , real_url , cache , cache_key ),
198-         )
199- 
200-     return  _read_index_result (ctx , download , output , real_url , cache , cache_key )
201- 
202213def  _read_index_result (ctx , result , output , url , cache , cache_key ):
203214    if  not  result .success :
204215        return  struct (success  =  False )
0 commit comments