Skip to content

Commit 6c54c51

Browse files
Replace urlsplit with a new parser
1 parent 079c9a3 commit 6c54c51

File tree

4 files changed

+90
-14
lines changed

4 files changed

+90
-14
lines changed

b2/_internal/_utils/uri.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
import dataclasses
1313
import pathlib
14-
import urllib.parse
1514
from functools import singledispatchmethod
1615
from pathlib import Path
1716
from typing import Sequence
@@ -24,6 +23,7 @@
2423
)
2524
from b2sdk.v3.exception import B2Error
2625

26+
from b2._internal._utils import uriparse
2727
from b2._internal._utils.python_compat import removeprefix
2828

2929

@@ -92,7 +92,7 @@ def parse_uri(uri: str, *, allow_all_buckets: bool = False) -> Path | B2URI | B2
9292
"""
9393
if not uri:
9494
raise ValueError('URI cannot be empty')
95-
parsed = urllib.parse.urlsplit(uri)
95+
parsed = uriparse.b2_urlsplit(uri)
9696
if parsed.scheme == '':
9797
return pathlib.Path(uri)
9898
return _parse_b2_uri(uri, parsed, allow_all_buckets=allow_all_buckets)
@@ -110,34 +110,29 @@ def parse_b2_uri(
110110
:return: B2 URI
111111
:raises ValueError: if the URI is invalid
112112
"""
113-
parsed = urllib.parse.urlsplit(uri)
113+
parsed = uriparse.b2_urlsplit(uri)
114114
return _parse_b2_uri(uri, parsed, allow_all_buckets=allow_all_buckets, allow_b2id=allow_b2id)
115115

116116

117117
def _parse_b2_uri(
118118
uri,
119-
parsed: urllib.parse.SplitResult,
119+
parsed: uriparse.SplitB2Result,
120120
*,
121121
allow_all_buckets: bool = False,
122122
allow_b2id: bool = True,
123123
) -> B2URI | B2FileIdURI:
124124
if parsed.scheme in ('b2', 'b2id'):
125-
path = urllib.parse.urlunsplit(parsed._replace(scheme='', netloc=''))
126125
if not parsed.netloc:
127126
if allow_all_buckets:
128-
if path:
127+
if parsed.path:
129128
raise ValueError(
130-
f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but {path!r} was provided"
129+
f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but {parsed.path!r} was provided"
131130
)
132131
return B2URI(bucket_name='')
133132
raise ValueError(f'Invalid B2 URI: {uri!r}')
134-
elif parsed.password or parsed.username:
135-
raise ValueError(
136-
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI'
137-
)
138133

139134
if parsed.scheme == 'b2':
140-
return B2URI(bucket_name=parsed.netloc, path=removeprefix(path, '/'))
135+
return B2URI(bucket_name=parsed.netloc, path=removeprefix(parsed.path, '/'))
141136
elif parsed.scheme == 'b2id' and allow_b2id:
142137
return B2FileIdURI(file_id=parsed.netloc)
143138
else:

b2/_internal/_utils/uriparse.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
######################################################################
2+
#
3+
# File: b2/_internal/_utils/uriparse.py
4+
#
5+
# Copyright 2025 Backblaze Inc. All Rights Reserved.
6+
#
7+
# License https://www.backblaze.com/using_b2_code.html
8+
#
9+
######################################################################
10+
import re
11+
12+
_CONTROL_CHARACTERS_AND_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
13+
_B2_URL_RE = re.compile(
14+
r"""
15+
(
16+
(?P<scheme>[a-z0-9]+)
17+
://
18+
)? # the delimiter is optional if there is no scheme defined
19+
(?P<netloc>[-a-z0-9]*) # scheme and
20+
(?P<path>\.{0,2}(/.*)?) # everything else from the first / is part of the path
21+
""",
22+
re.VERBOSE | re.IGNORECASE,
23+
)
24+
25+
26+
class SplitB2Result:
27+
def __init__(self, scheme, netloc, path):
28+
self._parts = (scheme, netloc, path)
29+
30+
def __iter__(self):
31+
return iter(self._parts)
32+
33+
def __getitem__(self, key):
34+
return self._parts[key]
35+
36+
def __len__(self):
37+
return len(self._parts)
38+
39+
def replace(self, scheme=None, netloc=None, path=None):
40+
return SplitB2Result(
41+
scheme=self.scheme if scheme is None else scheme,
42+
netloc=self.netloc if netloc is None else netloc,
43+
path=self.path if path is None else path,
44+
)
45+
46+
@property
47+
def scheme(self):
48+
return self._parts[0]
49+
50+
@property
51+
def netloc(self):
52+
return self._parts[1]
53+
54+
@property
55+
def path(self):
56+
return self._parts[2]
57+
58+
def __repr__(self):
59+
return f'SplitB2Result(scheme={self.scheme!r}, netloc={self.netloc!r}, path={self.path!r})'
60+
61+
62+
def b2_urlsplit(url: str) -> SplitB2Result:
63+
# clean the url
64+
url = url.lstrip(_CONTROL_CHARACTERS_AND_SPACE)
65+
for i in ['\n', '\r', '\t']:
66+
url.replace(i, '')
67+
68+
match = _B2_URL_RE.fullmatch(url)
69+
if not match:
70+
raise ValueError(f'Invalid B2 URI: {url!r}')
71+
72+
scheme = (match.group('scheme') or '').lower()
73+
netloc = match.group('netloc') or ''
74+
path = match.group('path') or ''
75+
76+
return SplitB2Result(scheme, netloc, path)

changelog.d/1090.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
handle filenames starting with / or ending with # or ?

test/unit/_utils/test_uri.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,17 @@ def test_b2fileuri_str():
6161
[
6262
('some/local/path', Path('some/local/path')),
6363
('./some/local/path', Path('some/local/path')),
64+
('.', Path('')),
6465
('b2://bucket', B2URI(bucket_name='bucket')),
6566
('b2://bucket/', B2URI(bucket_name='bucket')),
6667
('b2://bucket/path/to/dir/', B2URI(bucket_name='bucket', path='path/to/dir/')),
6768
('b2id://file123', B2FileIdURI(file_id='file123')),
6869
('b2://bucket/wild[card]', B2URI(bucket_name='bucket', path='wild[card]')),
6970
('b2://bucket/wild?card', B2URI(bucket_name='bucket', path='wild?card')),
7071
('b2://bucket/special#char', B2URI(bucket_name='bucket', path='special#char')),
72+
('b2://bucket/special#', B2URI(bucket_name='bucket', path='special#')),
73+
('b2://bucket/special?', B2URI(bucket_name='bucket', path='special?')),
74+
('b2://bucket//special', B2URI(bucket_name='bucket', path='/special')),
7175
],
7276
)
7377
def test_parse_uri(uri, expected):
@@ -94,11 +98,11 @@ def test_parse_uri__allow_all_buckets():
9498
# Test cases for B2 URIs with credentials
9599
(
96100
'b2://user@password:bucket/path',
97-
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI',
101+
"Invalid B2 URI: 'b2://user@password:bucket/path'",
98102
),
99103
(
100104
'b2id://user@password:file123',
101-
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI',
105+
"Invalid B2 URI: 'b2id://user@password:file123'",
102106
),
103107
# Test cases for unsupported URI schemes
104108
('unknown://bucket/path', "Unsupported URI scheme: 'unknown'"),

0 commit comments

Comments
 (0)