Skip to content

Commit 5d5b5d3

Browse files
Redesign url parsing to handle values with leading / and trailing special chars
* Replace urlsplit with a new parser * Replace class SplitB2Result with a namedtuple * Remove uriparse.b2_urlsplit and simplify the code * Handle path in parse_uri instead of _parse_b2_uri * Move URI cleaning into _clean_uri
1 parent 363c3cd commit 5d5b5d3

File tree

4 files changed

+47
-28
lines changed

4 files changed

+47
-28
lines changed

b2/_internal/_utils/uri.py

Lines changed: 37 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010
from __future__ import annotations
1111

1212
import dataclasses
13-
import pathlib
14-
import urllib.parse
13+
import re
1514
from functools import singledispatchmethod
1615
from pathlib import Path
1716
from typing import Sequence
@@ -24,7 +23,10 @@
2423
)
2524
from b2sdk.v3.exception import B2Error
2625

27-
from b2._internal._utils.python_compat import removeprefix
26+
_B2ID_PATTERN = re.compile(r'^b2id://(?P<file_id>[a-zA-Z0-9:_-]+)$', re.IGNORECASE)
27+
_B2_PATTERN = re.compile(r'^b2://(?P<bucket>[a-z0-9-]*)(?P<path>/.*)?$', re.IGNORECASE)
28+
_SCHEME_PATTERN = re.compile(r'(?P<scheme>[a-z0-9]*)://.*', re.IGNORECASE)
29+
_CONTROL_CHARACTERS_AND_SPACE = '\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
2830

2931

3032
class B2URIBase:
@@ -92,10 +94,10 @@ def parse_uri(uri: str, *, allow_all_buckets: bool = False) -> Path | B2URI | B2
9294
"""
9395
if not uri:
9496
raise ValueError('URI cannot be empty')
95-
parsed = urllib.parse.urlsplit(uri)
96-
if parsed.scheme == '':
97-
return pathlib.Path(uri)
98-
return _parse_b2_uri(uri, parsed, allow_all_buckets=allow_all_buckets)
97+
98+
if _SCHEME_PATTERN.fullmatch(_clean_uri(uri)):
99+
return _parse_b2_uri(uri, allow_all_buckets=allow_all_buckets)
100+
return Path(uri)
99101

100102

101103
def parse_b2_uri(
@@ -110,38 +112,48 @@ def parse_b2_uri(
110112
:return: B2 URI
111113
:raises ValueError: if the URI is invalid
112114
"""
113-
parsed = urllib.parse.urlsplit(uri)
114-
return _parse_b2_uri(uri, parsed, allow_all_buckets=allow_all_buckets, allow_b2id=allow_b2id)
115+
return _parse_b2_uri(uri, allow_all_buckets=allow_all_buckets, allow_b2id=allow_b2id)
116+
117+
118+
def _clean_uri(uri: str) -> str:
119+
# Clean URI
120+
uri = uri.lstrip(_CONTROL_CHARACTERS_AND_SPACE)
121+
for i in ('\n', '\r', '\t'):
122+
uri = uri.replace(i, '')
123+
return uri
115124

116125

117126
def _parse_b2_uri(
118127
uri,
119-
parsed: urllib.parse.SplitResult,
120128
*,
121129
allow_all_buckets: bool = False,
122130
allow_b2id: bool = True,
123131
) -> B2URI | B2FileIdURI:
124-
if parsed.scheme in ('b2', 'b2id'):
125-
path = urllib.parse.urlunsplit(parsed._replace(scheme='', netloc=''))
126-
if not parsed.netloc:
132+
uri = _clean_uri(uri)
133+
if uri.lower().startswith('b2://'):
134+
match = _B2_PATTERN.fullmatch(uri)
135+
if not match:
136+
raise ValueError(f'Invalid B2 URI: {uri!r}')
137+
138+
bucket = match.group('bucket')
139+
path = match.group('path')
140+
if not bucket:
127141
if allow_all_buckets:
128142
if path:
129143
raise ValueError(
130144
f"Invalid B2 URI: all buckets URI doesn't allow non-empty path, but {path!r} was provided"
131145
)
132146
return B2URI(bucket_name='')
133-
raise ValueError(f'Invalid B2 URI: {uri!r}')
134-
elif parsed.password or parsed.username:
135-
raise ValueError(
136-
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI'
137-
)
138-
139-
if parsed.scheme == 'b2':
140-
return B2URI(bucket_name=parsed.netloc, path=removeprefix(path, '/'))
141-
elif parsed.scheme == 'b2id' and allow_b2id:
142-
return B2FileIdURI(file_id=parsed.netloc)
143-
else:
144-
raise ValueError(f'Unsupported URI scheme: {parsed.scheme!r}')
147+
else:
148+
return B2URI(bucket_name=bucket, path=path[1:] if path else '')
149+
elif allow_b2id and uri.lower().startswith('b2id://'):
150+
match = _B2ID_PATTERN.fullmatch(uri)
151+
if match:
152+
return B2FileIdURI(file_id=match.group('file_id'))
153+
elif match := _SCHEME_PATTERN.fullmatch(uri):
154+
raise ValueError(f'Unsupported URI scheme: {match.group("scheme")!r}')
155+
156+
raise ValueError(f'Invalid B2 URI: {uri!r}')
145157

146158

147159
class B2URIAdapter:

changelog.d/1090.fixed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Handle filenames starting with / or ending with # or ?.

test/unit/_utils/test_uri.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,18 @@ def test_b2fileuri_str():
6161
[
6262
('some/local/path', Path('some/local/path')),
6363
('./some/local/path', Path('some/local/path')),
64+
('.', Path('')),
6465
('b2://bucket', B2URI(bucket_name='bucket')),
66+
(' b2://bucket', B2URI(bucket_name='bucket')),
6567
('b2://bucket/', B2URI(bucket_name='bucket')),
6668
('b2://bucket/path/to/dir/', B2URI(bucket_name='bucket', path='path/to/dir/')),
6769
('b2id://file123', B2FileIdURI(file_id='file123')),
6870
('b2://bucket/wild[card]', B2URI(bucket_name='bucket', path='wild[card]')),
6971
('b2://bucket/wild?card', B2URI(bucket_name='bucket', path='wild?card')),
7072
('b2://bucket/special#char', B2URI(bucket_name='bucket', path='special#char')),
73+
('b2://bucket/special#', B2URI(bucket_name='bucket', path='special#')),
74+
('b2://bucket/special?', B2URI(bucket_name='bucket', path='special?')),
75+
('b2://bucket//special', B2URI(bucket_name='bucket', path='/special')),
7176
],
7277
)
7378
def test_parse_uri(uri, expected):
@@ -94,14 +99,15 @@ def test_parse_uri__allow_all_buckets():
9499
# Test cases for B2 URIs with credentials
95100
(
96101
'b2://user@password:bucket/path',
97-
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI',
102+
"Invalid B2 URI: 'b2://user@password:bucket/path'",
98103
),
99104
(
100105
'b2id://user@password:file123',
101-
'Invalid B2 URI: credentials passed using `user@password:` syntax is not supported in URI',
106+
"Invalid B2 URI: 'b2id://user@password:file123'",
102107
),
103108
# Test cases for unsupported URI schemes
104109
('unknown://bucket/path', "Unsupported URI scheme: 'unknown'"),
110+
(' unknown://bucket/path', "Unsupported URI scheme: 'unknown'"),
105111
],
106112
)
107113
def test_parse_uri_exceptions(uri, expected_exception_message):

test/unit/console_tool/test_download_file.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def test_cat__b2_uri__invalid(b2_cli, capfd):
187187
expected_stderr=None,
188188
expected_status=2,
189189
)
190-
assert "argument B2_URI: Unsupported URI scheme: ''" in capfd.readouterr().err
190+
assert "argument B2_URI: Invalid B2 URI: 'nothing/meaningful'" in capfd.readouterr().err
191191

192192

193193
def test_cat__b2_uri__not_a_file(b2_cli, bucket, capfd):

0 commit comments

Comments
 (0)