Skip to content

Commit 5a4ad6b

Browse files
authored
Normalize RFC 4007 IPv6 Zone IDs
1 parent 0d82fee commit 5a4ad6b

File tree

7 files changed

+56
-8
lines changed

7 files changed

+56
-8
lines changed

src/rfc3986/abnf_regexp.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -115,14 +115,14 @@
115115
UNRESERVED_RE + SUB_DELIMITERS_RE + ':'
116116
)
117117

118-
119118
# RFC 6874 Zone ID ABNF
120119
ZONE_ID = '(?:[' + UNRESERVED_RE + ']|' + PCT_ENCODED + ')+'
121-
IPv6_ADDRZ_RE = IPv6_RE + '%25' + ZONE_ID
122120

123-
IP_LITERAL_RE = r'\[({0}|(?:{1})|{2})\]'.format(
124-
IPv6_RE,
125-
IPv6_ADDRZ_RE,
121+
IPv6_ADDRZ_RFC4007_RE = IPv6_RE + '(?:(?:%25|%)' + ZONE_ID + ')?'
122+
IPv6_ADDRZ_RE = IPv6_RE + '(?:%25' + ZONE_ID + ')?'
123+
124+
IP_LITERAL_RE = r'\[({0}|{1})\]'.format(
125+
IPv6_ADDRZ_RFC4007_RE,
126126
IPv_FUTURE_RE,
127127
)
128128

src/rfc3986/misc.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@
5959

6060

6161
IPv4_MATCHER = re.compile('^' + abnf_regexp.IPv4_RE + '$')
62+
IPv6_MATCHER = re.compile(r'^\[' + abnf_regexp.IPv6_ADDRZ_RFC4007_RE + r'\]$')
63+
64+
# Used by host validator
65+
IPv6_NO_RFC4007_MATCHER = re.compile(r'^\[%s\]$' % (
66+
abnf_regexp.IPv6_ADDRZ_RE
67+
))
6268

6369
# Matcher used to validate path components
6470
PATH_MATCHER = re.compile(abnf_regexp.PATH_RE)

src/rfc3986/normalizers.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ def normalize_password(password):
4949

5050
def normalize_host(host):
5151
"""Normalize a host string."""
52+
if misc.IPv6_MATCHER.match(host):
53+
percent = host.find('%')
54+
if percent != -1:
55+
percent_25 = host.find('%25')
56+
57+
# Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'
58+
# from RFC 6874. If the host is '[<IPv6 addr>%25]' then we
59+
# assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'
60+
if percent_25 == -1 or percent < percent_25 or \
61+
(percent == percent_25 and percent_25 == len(host) - 4):
62+
host = host.replace('%', '%25', 1)
63+
64+
# Don't normalize the casing of the Zone ID
65+
return host[:percent].lower() + host[percent:]
66+
5267
return host.lower()
5368

5469

src/rfc3986/uri.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,12 @@ def authority_info(self):
171171
# valid bytes, it is an InvalidAuthority.
172172
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
173173

174+
if (host and misc.IPv6_MATCHER.match(host) and not
175+
misc.IPv6_NO_RFC4007_MATCHER.match(host)):
176+
# If it's an IPv6 address that has RFC 4007 IPv6
177+
# Zone IDs then it's invalid.
178+
raise exc.InvalidAuthority(self.authority.encode(self.encoding))
179+
174180
return matches
175181

176182
@property

src/rfc3986/validators.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ def authority_is_valid(authority, host=None, require=False):
306306
validated = is_valid(authority, misc.SUBAUTHORITY_MATCHER, require)
307307
if validated and host is not None and misc.IPv4_MATCHER.match(host):
308308
return valid_ipv4_host_address(host)
309+
elif validated and host is not None and misc.IPv6_MATCHER.match(host):
310+
return misc.IPv6_NO_RFC4007_MATCHER.match(host) is not None
309311
return validated
310312

311313

tests/conftest.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
'[::1%25lo]', # With ZoneID
1313
'[FF02:0:0:0:0:0:0:2%25en01]', # With ZoneID
1414
'[FF02:30:0:0:0:0:0:5%25en1]', # With ZoneID
15+
'[FF02:30:0:0:0:0:0:5%25%26]', # With ZoneID
16+
'[FF02:30:0:0:0:0:0:5%2525]', # With ZoneID
1517
'[21DA:D3:0:2F3B:2AA:FF:FE28:9C5A]',
1618
'[FE80::2AA:FF:FE9A:4CA2]',
1719
'[FF02::2]',
@@ -30,9 +32,9 @@
3032
'[FF02::3::5]', # IPv6 can only have one ::
3133
'[FADF:01]', # Not properly compacted (missing a :)
3234
'[FADF:01%en0]', # Not properly compacted (missing a :), Invalid ZoneID
33-
'[FADF::01%en0]', # Invalid ZoneID separator
35+
'[FADF::01%en0]', # ZoneID is per RFC 4007
3436
'[FADF::01%]', # Invalid ZoneID separator and no ZoneID
35-
'[FADF::01%25]', # Missing ZoneID
37+
'[FADF::01%25]', # Missing ZoneID in RFC 6974, is 25 in RFC 4007
3638
'localhost:80:80:80', # Too many ports
3739
'256.256.256.256', # Invalid IPv4 Address
3840
SNOWMAN.decode('utf-8')

tests/test_normalizers.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from rfc3986.uri import URIReference
55
from rfc3986.normalizers import (
66
normalize_scheme, normalize_percent_characters,
7-
remove_dot_segments, encode_component,
7+
remove_dot_segments, encode_component, normalize_host
88
)
99

1010

@@ -93,3 +93,20 @@ def test_fragment_normalization():
9393
)
9494
def test_detect_percent_encoded_component(component, encoded_component):
9595
assert encode_component(component, 'utf-8') == encoded_component
96+
97+
98+
@pytest.mark.parametrize(
99+
["host", "normalized_host"],
100+
[
101+
('LOCALHOST', 'localhost'),
102+
('[::1%eth0]', '[::1%25eth0]'),
103+
('[::1%25]', '[::1%2525]'),
104+
('[::1%%25]', '[::1%25%25]'),
105+
('[::1%25%25]', '[::1%25%25]'),
106+
('[::Af%Ff]', '[::af%25Ff]'),
107+
('[::Af%%Ff]', '[::af%25%Ff]'),
108+
('[::Af%25Ff]', '[::af%25Ff]'),
109+
]
110+
)
111+
def test_normalize_host(host, normalized_host):
112+
assert normalize_host(host) == normalized_host

0 commit comments

Comments
 (0)