Skip to content

Commit f4b84c2

Browse files
authored
Stop validating Zone IDs before normalization (#45)
1 parent 5a4ad6b commit f4b84c2

File tree

7 files changed

+65
-21
lines changed

7 files changed

+65
-21
lines changed

src/rfc3986/misc.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
abnf_regexp.PORT_RE))
5959

6060

61+
HOST_MATCHER = re.compile('^' + abnf_regexp.HOST_RE + '$')
6162
IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$')
6263
IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$')
6364

src/rfc3986/uri.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,12 +171,6 @@ def authority_info(self):
171171
# valid bytes, it is an InvalidAuthority.
172172
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
173173

174-
if (host and misc.IPv6_MATCHER.match(host) and not
175-
misc.IPv6_NO_RFC4007_MATCHER.match(host)):
176-
# If it's an IPv6 address that has RFC 4007 IPv6
177-
# Zone IDs then it's invalid.
178-
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
179-
180174
return matches
181175

182176
@property

src/rfc3986/validators.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,24 @@ def authority_is_valid(authority, host=None, require=False):
304304
bool
305305
"""
306306
validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require)
307+
if validated and host is not None:
308+
return host_is_valid(host, require)
309+
return validated
310+
311+
312+
def host_is_valid(host, require=False):
313+
"""Determine if the host string is valid.
314+
315+
:param str host:
316+
The host to validate.
317+
:param bool require:
318+
(optional) Specify if host must not be None.
319+
:returns:
320+
``True`` if valid, ``False`` otherwise
321+
:rtype:
322+
bool
323+
"""
324+
validated = is_valid(host, misc.HOST_MATCHER, require)
307325
if validated and host is not None and misc.IPv4_MATCHER.match(host):
308326
return valid_ipv4_host_address(host)
309327
elif validated and host is not None and misc.IPv6_MATCHER.match(host):
@@ -397,7 +415,9 @@ def subauthority_component_is_valid(uri, component):
397415

398416
# If we can parse the authority into sub-components and we're not
399417
# validating the port, we can assume it's valid.
400-
if component != 'port':
418+
if component == 'host':
419+
return host_is_valid(subauthority_dict['host'], require=True)
420+
elif component != 'port':
401421
return True
402422

403423
try:

tests/conftest.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,7 @@
3232
'[FF02::3::5]', # IPv6 can only have one ::
3333
'[FADF:01]', # Not properly compacted (missing a :)
3434
'[FADF:01%en0]', # Not properly compacted (missing a :), Invalid ZoneID
35-
'[FADF::01%en0]', # ZoneID is per RFC 4007
36-
'[FADF::01%]', # Invalid ZoneID separator and no ZoneID
37-
'[FADF::01%25]', # Missing ZoneID in RFC 6974, is 25 in RFC 4007
35+
'[FADF::01%]', # Empty Zone ID
3836
'localhost:80:80:80', # Too many ports
3937
'256.256.256.256', # Invalid IPv4 Address
4038
SNOWMAN.decode('utf-8')

tests/test_builder.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,16 +67,22 @@ def test_add_credentials_requires_username():
6767
builder.URIBuilder().add_credentials(None, None)
6868

6969

70-
@pytest.mark.parametrize('hostname', [
71-
'google.com',
72-
'GOOGLE.COM',
73-
'gOOgLe.COM',
74-
'goOgLE.com',
75-
])
76-
def test_add_host(hostname):
70+
@pytest.mark.parametrize(
71+
['hostname', 'expected_hostname'],
72+
[
73+
('google.com', 'google.com'),
74+
('GOOGLE.COM', 'google.com'),
75+
('gOOgLe.COM', 'google.com'),
76+
('goOgLE.com', 'google.com'),
77+
('[::ff%etH0]', '[::ff%25etH0]'),
78+
('[::ff%25etH0]', '[::ff%25etH0]'),
79+
('[::FF%etH0]', '[::ff%25etH0]'),
80+
]
81+
)
82+
def test_add_host(hostname, expected_hostname):
7783
"""Verify we normalize hostnames in add_host."""
7884
uribuilder = builder.URIBuilder().add_host(hostname)
79-
assert uribuilder.host == 'google.com'
85+
assert uribuilder.host == expected_hostname
8086

8187

8288
@pytest.mark.parametrize('port', [

tests/test_normalizers.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,17 @@ def test_hostname_normalization():
6767
URIReference(None, 'example.com', None, None, None))
6868

6969

70-
def test_authority_normalization():
70+
@pytest.mark.parametrize(
71+
['authority', 'expected_authority'],
72+
[
73+
74+
('[::1%eth0]', '[::1%25eth0]')
75+
]
76+
)
77+
def test_authority_normalization(authority, expected_authority):
7178
uri = URIReference(
72-
None, 'user%[email protected]', None, None, None).normalize()
73-
assert uri.authority == 'user%[email protected]'
79+
None, authority, None, None, None).normalize()
80+
assert uri.authority == expected_authority
7481

7582

7683
def test_fragment_normalization():

tests/test_validators.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,3 +235,21 @@ def test_invalid_uri_with_invalid_path(invalid_uri):
235235
validators.Validator().check_validity_of(
236236
'host', 'path',
237237
).validate(uri)
238+
239+
240+
def test_validating_rfc_4007_ipv6_zone_ids():
241+
"""Verify that RFC 4007 IPv6 Zone IDs are invalid
242+
host/authority but after normalization are valid
243+
"""
244+
uri = rfc3986.uri_reference("http://[::1%eth0]")
245+
with pytest.raises(exceptions.InvalidComponentsError):
246+
validators.Validator().check_validity_of(
247+
'host'
248+
).validate(uri)
249+
250+
uri = uri.normalize()
251+
assert uri.host == '[::1%25eth0]'
252+
253+
validators.Validator().check_validity_of(
254+
'host'
255+
).validate(uri)

0 commit comments

Comments
 (0)