1010from __future__ import annotations
1111
1212import dataclasses
13- import pathlib
13+ import re
1414from functools import singledispatchmethod
1515from pathlib import Path
1616from typing import Sequence
2323)
2424from b2sdk .v3 .exception import B2Error
2525
26- from b2 ._internal ._utils import uriparse
27- from b2 ._internal ._utils .python_compat import removeprefix
26+ _B2ID_PATTERN = re .compile (r'^b2id://(?P<file_id>[a-zA-Z0-9:_-]+)$' , re .IGNORECASE )
27+ _B2_PATTERN = re .compile (r'^b2://(?P<bucket>[a-z0-9-]*)(?P<path>/.*)?$' , re .IGNORECASE )
28+ _SCHEME_PATTERN = re .compile (r'(?P<scheme>[a-z0-9]*)://.*' , re .IGNORECASE )
2829
2930
3031class B2URIBase :
@@ -92,10 +93,7 @@ def parse_uri(uri: str, *, allow_all_buckets: bool = False) -> Path | B2URI | B2
9293 """
9394 if not uri :
9495 raise ValueError ('URI cannot be empty' )
95- parsed = uriparse .b2_urlsplit (uri )
96- if parsed .scheme == '' :
97- return pathlib .Path (uri )
98- return _parse_b2_uri (uri , parsed , allow_all_buckets = allow_all_buckets )
96+ return _parse_b2_uri (uri , allow_all_buckets = allow_all_buckets )
9997
10098
10199def parse_b2_uri (
@@ -110,33 +108,54 @@ def parse_b2_uri(
110108 :return: B2 URI
111109 :raises ValueError: if the URI is invalid
112110 """
113- parsed = uriparse .b2_urlsplit (uri )
114- return _parse_b2_uri (uri , parsed , allow_all_buckets = allow_all_buckets , allow_b2id = allow_b2id )
111+ return _parse_b2_uri (
112+ uri , allow_all_buckets = allow_all_buckets , allow_b2id = allow_b2id , allow_path = False
113+ )
115114
116115
117116def _parse_b2_uri (
118117 uri ,
119- parsed : uriparse .SplitB2Result ,
120118 * ,
121119 allow_all_buckets : bool = False ,
122120 allow_b2id : bool = True ,
123- ) -> B2URI | B2FileIdURI :
124- if parsed .scheme in ('b2' , 'b2id' ):
125- if not parsed .netloc :
121+ allow_path : bool = True ,
122+ ) -> B2URI | B2FileIdURI | Path :
123+ # Clean URI
124+ original_uri = uri
125+ uri = uri .lstrip ('' .join (chr (i ) for i in range (33 )))
126+ for i in ['\n ' , '\r ' , '\t ' ]:
127+ uri = uri .replace (i , '' )
128+
129+ if uri .lower ().startswith ('b2://' ):
130+ match = _B2_PATTERN .fullmatch (uri )
131+ if not match :
132+ raise ValueError (f'Invalid B2 URI: { uri !r} ' )
133+
134+ bucket = match .group ('bucket' )
135+ path = match .group ('path' )
136+ if not bucket :
126137 if allow_all_buckets :
127- if parsed . path :
138+ if path :
128139 raise ValueError (
129- f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but { parsed . path !r} was provided"
140+ f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but { path !r} was provided"
130141 )
131142 return B2URI (bucket_name = '' )
132143 raise ValueError (f'Invalid B2 URI: { uri !r} ' )
144+ return B2URI (bucket_name = bucket , path = path [1 :] if path else '' )
145+
146+ if allow_b2id and uri .lower ().startswith ('b2id://' ):
147+ match = _B2ID_PATTERN .fullmatch (uri )
148+ if not match :
149+ raise ValueError (f'Invalid B2 URI: { uri !r} ' )
150+ return B2FileIdURI (file_id = match .group ('file_id' ))
151+
152+ if match := _SCHEME_PATTERN .fullmatch (uri ):
153+ raise ValueError (f'Unsupported URI scheme: { match .group ("scheme" )!r} ' )
154+
155+ if allow_path :
156+ return Path (original_uri )
133157
134- if parsed .scheme == 'b2' :
135- return B2URI (bucket_name = parsed .netloc , path = removeprefix (parsed .path , '/' ))
136- elif parsed .scheme == 'b2id' and allow_b2id :
137- return B2FileIdURI (file_id = parsed .netloc )
138- else :
139- raise ValueError (f'Unsupported URI scheme: { parsed .scheme !r} ' )
158+ raise ValueError (f'Invalid B2 URI: { uri !r} ' )
140159
141160
142161class B2URIAdapter :
0 commit comments