1010from __future__ import annotations
1111
1212import dataclasses
13- import pathlib
14- import urllib .parse
13+ import re
1514from functools import singledispatchmethod
1615from pathlib import Path
1716from typing import Sequence
2423)
2524from b2sdk .v3 .exception import B2Error
2625
27- from b2 ._internal ._utils .python_compat import removeprefix
26+ _B2ID_PATTERN = re .compile (r'^b2id://(?P<file_id>[a-zA-Z0-9:_-]+)$' , re .IGNORECASE )
27+ _B2_PATTERN = re .compile (r'^b2://(?P<bucket>[a-z0-9-]*)(?P<path>/.*)?$' , re .IGNORECASE )
28+ _SCHEME_PATTERN = re .compile (r'(?P<scheme>[a-z0-9]*)://.*' , re .IGNORECASE )
29+ _CONTROL_CHARACTERS_AND_SPACE = '\x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \t \n \x0b \x0c \r \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f '
2830
2931
3032class B2URIBase :
@@ -92,10 +94,10 @@ def parse_uri(uri: str, *, allow_all_buckets: bool = False) -> Path | B2URI | B2
9294 """
9395 if not uri :
9496 raise ValueError ('URI cannot be empty' )
95- parsed = urllib . parse . urlsplit ( uri )
96- if parsed . scheme == '' :
97- return pathlib . Path (uri )
98- return _parse_b2_uri (uri , parsed , allow_all_buckets = allow_all_buckets )
97+
98+ if _SCHEME_PATTERN . fullmatch ( _clean_uri ( uri )) :
99+ return _parse_b2_uri (uri , allow_all_buckets = allow_all_buckets )
100+ return Path (uri )
99101
100102
101103def parse_b2_uri (
@@ -110,38 +112,48 @@ def parse_b2_uri(
110112 :return: B2 URI
111113 :raises ValueError: if the URI is invalid
112114 """
113- parsed = urllib .parse .urlsplit (uri )
114- return _parse_b2_uri (uri , parsed , allow_all_buckets = allow_all_buckets , allow_b2id = allow_b2id )
115+ return _parse_b2_uri (uri , allow_all_buckets = allow_all_buckets , allow_b2id = allow_b2id )
116+
117+
118+ def _clean_uri (uri : str ) -> str :
119+ # Clean URI
120+ uri = uri .lstrip (_CONTROL_CHARACTERS_AND_SPACE )
121+ for i in ('\n ' , '\r ' , '\t ' ):
122+ uri = uri .replace (i , '' )
123+ return uri
115124
116125
117126def _parse_b2_uri (
118127 uri ,
119- parsed : urllib .parse .SplitResult ,
120128 * ,
121129 allow_all_buckets : bool = False ,
122130 allow_b2id : bool = True ,
123131) -> B2URI | B2FileIdURI :
124- if parsed .scheme in ('b2' , 'b2id' ):
125- path = urllib .parse .urlunsplit (parsed ._replace (scheme = '' , netloc = '' ))
126- if not parsed .netloc :
132+ uri = _clean_uri (uri )
133+ if uri .lower ().startswith ('b2://' ):
134+ match = _B2_PATTERN .fullmatch (uri )
135+ if not match :
136+ raise ValueError (f'Invalid B2 URI: { uri !r} ' )
137+
138+ bucket = match .group ('bucket' )
139+ path = match .group ('path' )
140+ if not bucket :
127141 if allow_all_buckets :
128142 if path :
129143 raise ValueError (
130144 f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but { path !r} was provided"
131145 )
132146 return B2URI (bucket_name = '' )
133- raise ValueError (f'Invalid B2 URI: { uri !r} ' )
134- elif parsed .password or parsed .username :
135- raise ValueError (
136- 'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI'
137- )
138-
139- if parsed .scheme == 'b2' :
140- return B2URI (bucket_name = parsed .netloc , path = removeprefix (path , '/' ))
141- elif parsed .scheme == 'b2id' and allow_b2id :
142- return B2FileIdURI (file_id = parsed .netloc )
143- else :
144- raise ValueError (f'Unsupported URI scheme: { parsed .scheme !r} ' )
147+ else :
148+ return B2URI (bucket_name = bucket , path = path [1 :] if path else '' )
149+ elif allow_b2id and uri .lower ().startswith ('b2id://' ):
150+ match = _B2ID_PATTERN .fullmatch (uri )
151+ if match :
152+ return B2FileIdURI (file_id = match .group ('file_id' ))
153+ elif match := _SCHEME_PATTERN .fullmatch (uri ):
154+ raise ValueError (f'Unsupported URI scheme: { match .group ("scheme" )!r} ' )
155+
156+ raise ValueError (f'Invalid B2 URI: { uri !r} ' )
145157
146158
147159class B2URIAdapter :
0 commit comments