3535 Union ,
3636 cast ,
3737)
38- from urllib .parse import urljoin , urlparse
38+ from urllib .parse import ParseResult , urljoin , urlparse , urlunparse
3939
4040from playwright ._impl ._api_structures import NameValue
4141from playwright ._impl ._errors import (
@@ -210,8 +210,12 @@ def map_token(original: str, replacement: str) -> str:
210210 # Handle special case of http*://, note that the new schema has to be
211211 # a web schema so that slashes are properly inserted after domain.
212212 if index == 0 and token .endswith (":" ):
213- # Using a simple replacement for the scheme part
214- processed_parts .append (map_token (token , "http:" ))
213+ # Replace any pattern with http:
214+ if "*" in token or "{" in token :
215+ processed_parts .append (map_token (token , "http:" ))
216+ else :
217+ # Preserve explicit schema as is as it may affect trailing slashes after domain.
218+ processed_parts .append (token )
215219 continue
216220 question_index = token .find ("?" )
217221 if question_index == - 1 :
@@ -222,55 +226,49 @@ def map_token(original: str, replacement: str) -> str:
222226 processed_parts .append (new_prefix + new_suffix )
223227
224228 relative_path = "/" .join (processed_parts )
225- resolved_url , case_insensitive_part = resolve_base_url (base_url , relative_path )
229+ resolved , case_insensitive_part = resolve_base_url (base_url , relative_path )
226230
227- for replacement , original in token_map .items ():
228- normalize = case_insensitive_part and replacement in case_insensitive_part
229- resolved_url = resolved_url .replace (
230- replacement , original .lower () if normalize else original , 1
231+ for token , original in token_map .items ():
232+ normalize = case_insensitive_part and token in case_insensitive_part
233+ resolved = resolved .replace (
234+ token , original .lower () if normalize else original , 1
231235 )
232236
233- return ensure_trailing_slash ( resolved_url )
237+ return resolved
234238
235239
236240def resolve_base_url (
237241 base_url : Optional [str ], given_url : str
238242) -> Tuple [str , Optional [str ]]:
239243 try :
240- resolved = urljoin (base_url if base_url is not None else "" , given_url )
241- parsed = urlparse (resolved )
244+ url = nodelike_urlparse (
245+ urljoin (base_url if base_url is not None else "" , given_url )
246+ )
247+ resolved = urlunparse (url )
242248 # Schema and domain are case-insensitive.
243249 hostname_port = (
244- parsed .hostname or ""
250+ url .hostname or ""
245251 ) # can't use parsed.netloc because it includes userinfo (username:password)
246- if parsed .port :
247- hostname_port += f":{ parsed .port } "
248- case_insensitive_prefix = f"{ parsed .scheme } ://{ hostname_port } "
252+ if url .port :
253+ hostname_port += f":{ url .port } "
254+ case_insensitive_prefix = f"{ url .scheme } ://{ hostname_port } "
249255 return resolved , case_insensitive_prefix
250256 except Exception :
251257 return given_url , None
252258
253259
254- # In Node.js, new URL('http://localhost') returns 'http://localhost/'.
255- # To ensure the same url matching behavior, do the same.
256- def ensure_trailing_slash (url : str ) -> str :
257- split = url .split ("://" , maxsplit = 1 )
258- if len (split ) == 2 :
259- # URL parser doesn't like strange/unknown schemes, so we replace it for parsing, then put it back
260- parsable_url = "http://" + split [1 ]
261- else :
262- # Given current rules, this should never happen _and_ still be a valid matcher. We require the protocol to be part of the match,
263- # so either the user is using a glob that starts with "*" (and none of this code is running), or the user actually has `something://` in `match`
264- parsable_url = url
265- parsed = urlparse (parsable_url , allow_fragments = True )
266- if len (split ) == 2 :
267- # Replace the scheme that we removed earlier
268- parsed = parsed ._replace (scheme = split [0 ])
269- if parsed .path == "" :
270- parsed = parsed ._replace (path = "/" )
271- url = parsed .geturl ()
272-
273- return url
260+ def nodelike_urlparse (url : str ) -> ParseResult :
261+ parsed = urlparse (url , allow_fragments = True )
262+
263+ # https://url.spec.whatwg.org/#special-scheme
264+ is_special_url = parsed .scheme in ["http" , "https" , "ws" , "wss" , "ftp" , "file" ]
265+ if is_special_url :
266+ # special urls have a list path, list paths are serialized as follows: https://url.spec.whatwg.org/#url-path-serializer
267+ # urllib diverges, so we patch it here
268+ if parsed .path == "" :
269+ parsed = parsed ._replace (path = "/" )
270+
271+ return parsed
274272
275273
276274class HarLookupResult (TypedDict , total = False ):
0 commit comments