@@ -253,22 +253,27 @@ def urlparse(url: str = "", **kwargs: str | None) -> ParseResult:
253253 parsed_userinfo != "" or parsed_host != "" or parsed_port is not None
254254 )
255255 validate_path (path , has_scheme = has_scheme , has_authority = has_authority )
256- if has_authority :
256+ if has_scheme or has_authority :
257257 path = normalize_path (path )
258258
259259 # The GEN_DELIMS set is... : / ? # [ ] @
260260 # These do not need to be percent-quoted unless they serve as delimiters for the
261261 # specific component.
262+ WHATWG_SAFE = '`{}%|^\\ "'
262263
263264 # For 'path' we need to drop ? and # from the GEN_DELIMS set.
264- parsed_path : str = quote (path , safe = SUB_DELIMS + ":/[]@" )
265+ parsed_path : str = quote (path , safe = SUB_DELIMS + WHATWG_SAFE + ":/[]@" )
265266 # For 'query' we need to drop '#' from the GEN_DELIMS set.
266267 parsed_query : str | None = (
267- None if query is None else quote (query , safe = SUB_DELIMS + ":/?[]@" )
268+ None
269+ if query is None
270+ else quote (query , safe = SUB_DELIMS + WHATWG_SAFE + ":/?[]@" )
268271 )
269272 # For 'fragment' we can include all of the GEN_DELIMS set.
270273 parsed_fragment : str | None = (
271- None if fragment is None else quote (fragment , safe = SUB_DELIMS + ":/?#[]@" )
274+ None
275+ if fragment is None
276+ else quote (fragment , safe = SUB_DELIMS + WHATWG_SAFE + ":/?#[]@" )
272277 )
273278
274279 # The parsed ASCII bytestrings are our canonical form.
@@ -321,7 +326,8 @@ def encode_host(host: str) -> str:
321326 # From https://datatracker.ietf.org/doc/html/rfc3986/#section-3.2.2
322327 #
323328 # reg-name = *( unreserved / pct-encoded / sub-delims )
324- return quote (host .lower (), safe = SUB_DELIMS )
329+ WHATWG_SAFE = '"`{}%|\\ '
330+ return quote (host .lower (), safe = SUB_DELIMS + WHATWG_SAFE )
325331
326332 # IDNA hostnames
327333 try :
@@ -369,19 +375,17 @@ def validate_path(path: str, has_scheme: bool, has_authority: bool) -> None:
369375 # must either be empty or begin with a slash ("/") character."
370376 if path and not path .startswith ("/" ):
371377 raise InvalidURL ("For absolute URLs, path must be empty or begin with '/'" )
372- else :
378+
379+ if not has_scheme and not has_authority :
373380 # If a URI does not contain an authority component, then the path cannot begin
374381 # with two slash characters ("//").
375382 if path .startswith ("//" ):
376- raise InvalidURL (
377- "URLs with no authority component cannot have a path starting with '//'"
378- )
383+ raise InvalidURL ("Relative URLs cannot have a path starting with '//'" )
384+
379385 # In addition, a URI reference (Section 4.1) may be a relative-path reference,
380386 # in which case the first path segment cannot contain a colon (":") character.
381- if path .startswith (":" ) and not has_scheme :
382- raise InvalidURL (
383- "URLs with no scheme component cannot have a path starting with ':'"
384- )
387+ if path .startswith (":" ):
388+ raise InvalidURL ("Relative URLs cannot have a path starting with ':'" )
385389
386390
387391def normalize_path (path : str ) -> str :
0 commit comments