Skip to content

Commit b2f0b8d

Browse files
lrhnCommit Queue
authored andcommitted
Accept IPvFuture syntax in Uri.
The RFC 3986 grammar for `[...]` addresses accepts IPv6 addresses and IPvFuture addresses, where the latter have the form `'v' <hexDigit>+ '.' (<unreserved>|<sub-delim>|':')+`. This allows the IPvFuture syntax, with no interpretation, as the `host` of a `Uri`. For now, the `Uri(host: ...)` constructor argument only allows IPvFuture addresses that are already wrapped in `[...]` brackets, and the `Uri.host` gette returns IPvFuture addresses including brackets. The `Uri(host:...)` still allows unbracketed IPv6 addresses (distinguished from plain host-names by containing a `:`), and `Uri.host` returns IPv6 addresses without brackets. `Uri.parse` only accept IPv6 and IPvFuture in brackets. (Only IPv6 can have a zone.) Fixes #60483. CoreLibraryReviewExempt: Local implementation only, no API. Bug: https://dartbug.com/60483 Change-Id: Id369ba1316b34f443edfe5b0f56864c32beddccc Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/421081 Reviewed-by: Nate Bosch <[email protected]> Commit-Queue: Lasse Nielsen <[email protected]>
1 parent af7f639 commit b2f0b8d

File tree

3 files changed

+286
-23
lines changed

3 files changed

+286
-23
lines changed

sdk/lib/core/uri.dart

Lines changed: 175 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ const int _BACKSLASH = 0x5C;
2020
const int _RIGHT_BRACKET = 0x5D;
2121
const int _LOWER_CASE_A = 0x61;
2222
const int _LOWER_CASE_F = 0x66;
23+
const int _LOWER_CASE_V = 0x76;
2324
const int _LOWER_CASE_Z = 0x7A;
2425

2526
const String _hexDigits = "0123456789ABCDEF";
@@ -909,6 +910,19 @@ abstract interface class Uri {
909910
// query = *( pchar / "/" / "?" )
910911
//
911912
// fragment = *( pchar / "/" / "?" )
913+
// Pv6address = 6( h16 ":" ) ls32
914+
// / "::" 5( h16 ":" ) ls32
915+
// / [ h16 ] "::" 4( h16 ":" ) ls32
916+
// / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32
917+
// / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32
918+
// / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32
919+
// / [ *4( h16 ":" ) h16 ] "::" ls32
920+
// / [ *5( h16 ":" ) h16 ] "::" h16
921+
// / [ *6( h16 ":" ) h16 ] "::"
922+
// ls32 = ( h16 ":" h16 ) / IPv4address
923+
// ; least-significant 32 bits of address
924+
// h16 = 1*4HEXDIG
925+
// ; 16 bits of address represented in hexadecimal
912926
end ??= uri.length;
913927

914928
// Special case data:URIs. Ignore case when testing.
@@ -1439,6 +1453,102 @@ abstract interface class Uri {
14391453
return result;
14401454
}
14411455

1456+
/// Checks if a (sub-)string is a valid IPv6 or IPvFuture address.
1457+
///
1458+
/// See [parseIPv6Address] for IPv6 format.
1459+
///
1460+
/// The format of IPvFuture is:
1461+
/// * 'v' \<hexDigit\>+ '.' (\<unreserved\> | \<sub-delims\> | ':')+
1462+
///
1463+
/// Since 'v' cannot start an IPv6 address, the input is either one
1464+
/// or the other.
1465+
/// No attempt is made to interpret an IPvFuture address, not even
1466+
/// if the hex digit is `4` or `6`.
1467+
///
1468+
/// Throws [FormatException] if the input is not a valid IPv6,
1469+
/// IPv6+zone or IPvFuture address.
1470+
///
1471+
/// Returns whether a valid input was an IPv6 address,
1472+
/// rather than an IPvFuture address.
1473+
static bool _validateIPvAddress(String host, int start, int end) {
1474+
assert(0 <= start && start <= end && end <= host.length);
1475+
if (start == end) throw FormatException("Empty IP address", host, start);
1476+
var firstChar = host.codeUnitAt(start);
1477+
if (firstChar == _LOWER_CASE_V) {
1478+
var error = _validateIPvFutureAddress(host, start, end);
1479+
if (error != null) throw error;
1480+
return false;
1481+
}
1482+
// TODO: Have a validator that is not also parsing into bytes.
1483+
parseIPv6Address(host, start, end);
1484+
return true;
1485+
}
1486+
1487+
/// Validate that [start]..[end] of [host] is IPvFuture version and address.
1488+
///
1489+
/// The [start] is at the leading 'v'.
1490+
///
1491+
/// Returns a [FormatException] if input is not valid, and `null` if it
1492+
/// is valid. The caller can then throw the exception.
1493+
static FormatException? _validateIPvFutureAddress(
1494+
String host,
1495+
int start,
1496+
int end,
1497+
) {
1498+
assert(host.startsWith('v', start));
1499+
start++;
1500+
var cursor = start;
1501+
var char = 0;
1502+
while (true) {
1503+
if (cursor < end) {
1504+
char = host.codeUnitAt(cursor++);
1505+
// Continue if ASCII digit.
1506+
if (char ^ 0x30 <= 9) continue;
1507+
// Continue if a-f, A-F.
1508+
var ucChar = char | 0x20;
1509+
if (ucChar >= _LOWER_CASE_A && ucChar <= _LOWER_CASE_F) continue;
1510+
if (char == _DOT) {
1511+
// Done, check if any hex digits were seen.
1512+
if (cursor - 1 == start) {
1513+
return FormatException(
1514+
"Missing hex-digit in IPvFuture address",
1515+
host,
1516+
cursor,
1517+
);
1518+
}
1519+
break;
1520+
}
1521+
return FormatException("Unexpected character", host, cursor - 1);
1522+
}
1523+
// Found non-`.` chracter after zero or more hex digits.
1524+
if (cursor - 1 == start) {
1525+
return FormatException(
1526+
"Missing hex-digit in IPvFuture address",
1527+
host,
1528+
cursor,
1529+
);
1530+
}
1531+
return FormatException("Missing '.' in IPvFuture address", host, cursor);
1532+
}
1533+
if (cursor == end) {
1534+
return FormatException(
1535+
"Missing address in IPvFuture address, host, cursor",
1536+
);
1537+
}
1538+
while (true) {
1539+
var char = host.codeUnitAt(cursor);
1540+
if (_charTables.codeUnitAt(char) & _ipvFutureAddressCharsMask != 0) {
1541+
if (++cursor < end) continue;
1542+
return null;
1543+
}
1544+
return FormatException(
1545+
"Invalid IPvFuture address character",
1546+
host,
1547+
cursor,
1548+
);
1549+
}
1550+
}
1551+
14421552
/// Parses the [host] as an IP version 6 (IPv6) address.
14431553
///
14441554
/// Returns the address as a list of 16 bytes in network byte order
@@ -1456,6 +1566,36 @@ abstract interface class Uri {
14561566
/// * `3ffe:2a00:100:7031::1`
14571567
/// * `::FFFF:129.144.52.38`
14581568
/// * `2010:836B:4179::836B:4179`
1569+
///
1570+
/// The grammar for IPv6 addresses are:
1571+
/// ```
1572+
/// IPv6address ::= (h16 ":"){6} ls32
1573+
/// | "::" (h16 ":"){5} ls32
1574+
/// | ( h16) "::" (h16 ":"){4} ls32
1575+
/// | ((h16 ":"){0,1} h16) "::" (h16 ":"){3} ls32
1576+
/// | ((h16 ":"){0,2} h16) "::" (h16 ":"){2} ls32
1577+
/// | ((h16 ":"){0,3} h16) "::" h16 ":" ls32
1578+
/// | ((h16 ":"){0,4} h16) "::" ls32
1579+
/// | ((h16 ":"){0,5} h16) "::" h16
1580+
/// | ((h16 ":"){0,6} h16) "::"
1581+
/// ls32 ::= (h16 ":" h16) | IPv4address
1582+
/// ;; least-significant 32 bits of address
1583+
/// h16 ::= HEXDIG{1,4}
1584+
/// ;; 16 bits of address represented in hexadecimal
1585+
/// ```
1586+
/// That is
1587+
/// - eight 1-to-4-digit hexadecimal numerals separated by `:`, or
1588+
/// - one to seven such `:`-separated numerals, with either one pair is
1589+
/// separated by `::`, or a leading or trailing `::`.
1590+
/// - either of the above with a trailing two `:`-separated numerals
1591+
/// replaced by an IPv4 addresss.
1592+
///
1593+
/// An IPv6 address with a zone ID (from RFC 6874) is an IPv6 address followed
1594+
/// by `%25` (an escaped `%`) and valid zone characters.
1595+
/// ```
1596+
/// IPv6addrz ::= IPv6address "%25" ZoneID
1597+
/// ZoneID ::= (unreserved | pct-encoded)+
1598+
/// ```.
14591599
static List<int> parseIPv6Address(String host, [int start = 0, int? end]) {
14601600
end ??= host.length;
14611601
// An IPv6 address consists of exactly 8 parts of 1-4 hex digits, separated
@@ -1782,7 +1922,7 @@ final class _Uri implements _PlatformUri {
17821922
String get host {
17831923
String? host = _host;
17841924
if (host == null) return "";
1785-
if (host.startsWith('[')) {
1925+
if (host.startsWith('[') && !host.startsWith('v', 1)) {
17861926
return host.substring(1, host.length - 1);
17871927
}
17881928
return host;
@@ -1839,7 +1979,7 @@ final class _Uri implements _PlatformUri {
18391979
var hostEnd = hostStart;
18401980
if (hostStart < authority.length &&
18411981
authority.codeUnitAt(hostStart) == _LEFT_BRACKET) {
1842-
// IPv6 host.
1982+
// IPv6 or IPvFuture host.
18431983
int escapeForZoneID = -1;
18441984
for (; hostEnd < authority.length; hostEnd++) {
18451985
int char = authority.codeUnitAt(hostEnd);
@@ -1859,7 +1999,7 @@ final class _Uri implements _PlatformUri {
18591999
hostStart,
18602000
);
18612001
}
1862-
Uri.parseIPv6Address(
2002+
bool isIPv6 = Uri._validateIPvAddress(
18632003
authority,
18642004
hostStart + 1,
18652005
(escapeForZoneID < 0) ? hostEnd : escapeForZoneID,
@@ -2161,33 +2301,42 @@ final class _Uri implements _PlatformUri {
21612301
/// Check and normalize a host name.
21622302
///
21632303
/// If the host name starts and ends with '[' and ']', it is considered an
2164-
/// IPv6 address. If [strictIPv6] is false, the address is also considered
2304+
/// IPv6 address (with or without a zone) or an IPvFuture address.
2305+
/// If [strictIPv6] is false, the address is also considered
21652306
/// an IPv6 address if it contains any ':' character.
21662307
///
2167-
/// If it is not an IPv6 address, it is case- and escape-normalized.
2308+
/// If it is not an IPv6/IPvFUture address, it is case- and escape-normalized.
21682309
/// This escapes all characters not valid in a reg-name,
21692310
/// and converts all non-escape upper-case letters to lower-case.
21702311
static String? _makeHost(String? host, int start, int end, bool strictIPv6) {
21712312
// TODO(lrn): Should we normalize IPv6 addresses according to RFC 5952?
21722313
if (host == null) return null;
21732314
if (start == end) return "";
2174-
// Host is an IPv6 address if it starts with '[' or contains a colon.
2315+
// Host is an IPv6 or IPvFuture address if it starts with '[',
2316+
// or an IPv6 address if it contains a colon.
21752317
if (host.codeUnitAt(start) == _LEFT_BRACKET) {
21762318
if (host.codeUnitAt(end - 1) != _RIGHT_BRACKET) {
21772319
_fail(host, start, 'Missing end `]` to match `[` in host');
21782320
}
21792321
String zoneID = "";
2180-
int index = _checkZoneID(host, start + 1, end - 1);
2181-
if (index < end - 1) {
2182-
int zoneIDstart =
2183-
(host.startsWith("25", index + 1)) ? index + 3 : index + 1;
2184-
zoneID = _normalizeZoneID(host, zoneIDstart, end - 1, "%25");
2322+
int index = end - 1;
2323+
if (host.codeUnitAt(start + 1) != _LOWER_CASE_V) {
2324+
index = _checkZoneID(host, start + 1, end - 1);
2325+
if (index < end - 1) {
2326+
int zoneIDstart =
2327+
(host.startsWith("25", index + 1)) ? index + 3 : index + 1;
2328+
zoneID = _normalizeZoneID(host, zoneIDstart, end - 1, "%25");
2329+
}
21852330
}
2186-
Uri.parseIPv6Address(host, start + 1, index);
2187-
// RFC 5952 requires hex digits to be lower case.
2188-
return host.substring(start, index).toLowerCase() + zoneID + ']';
2331+
bool isIPv6 = Uri._validateIPvAddress(host, start + 1, index);
2332+
var hostChars = host.substring(start + 1, index);
2333+
// RFC 5952 requires IPv6 hex digits to be lower case.
2334+
if (isIPv6) hostChars = hostChars.toLowerCase();
2335+
return '[$hostChars$zoneID]';
21892336
}
21902337
if (!strictIPv6) {
2338+
// IPv6 addresses allowed without `[...]` brackets.
2339+
// Used when called from `Uri` constructor.
21912340
// TODO(lrn): skip if too short to be a valid IPv6 address?
21922341
for (int i = start; i < end; i++) {
21932342
if (host.codeUnitAt(i) == _COLON) {
@@ -2199,7 +2348,7 @@ final class _Uri implements _PlatformUri {
21992348
zoneID = _normalizeZoneID(host, zoneIDstart, end, "%25");
22002349
}
22012350
Uri.parseIPv6Address(host, start, index);
2202-
return '[${host.substring(start, index)}' + zoneID + ']';
2351+
return '[${host.substring(start, index)}$zoneID]';
22032352
}
22042353
}
22052354
}
@@ -4008,8 +4157,7 @@ final class UriData {
40084157
for (int i = 0; i < bytes.length; i++) {
40094158
int byte = bytes[i];
40104159
byteOr |= byte;
4011-
if (byte < 128 &&
4012-
((_charTables.codeUnitAt(byte) & canonicalMask) != 0)) {
4160+
if (byte < 128 && ((_charTables.codeUnitAt(byte) & canonicalMask) != 0)) {
40134161
buffer.writeCharCode(byte);
40144162
} else {
40154163
buffer.writeCharCode(_PERCENT);
@@ -4029,7 +4177,6 @@ final class UriData {
40294177

40304178
String toString() =>
40314179
(_separatorIndices[0] == _noScheme) ? "data:$_text" : _text;
4032-
40334180
}
40344181

40354182
// --- URI PARSER TABLE --- start --- generated code, do not edit ---
@@ -4097,7 +4244,8 @@ const int _schemeStart = 20;
40974244
/// which maps the range U+0020 .. U+007F into positions 0 .. 95.
40984245
/// All remaining characters are mapped to position 0x1f (`0x7f ^ 0x60`), which
40994246
/// represents the transition for all remaining characters.
4100-
const String _scannerTables = "\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
4247+
const String _scannerTables =
4248+
"\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
41014249
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE1\xE1"
41024250
"\x01\xE1\xE1\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01"
41034251
"\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\x01\xE1\xE3\xE1\xE1\x01\xE1\x01"
@@ -4897,7 +5045,6 @@ bool _caseInsensitiveEquals(String string1, String string2) =>
48975045
string1.length == string2.length &&
48985046
_caseInsensitiveStartsWith(string1, string2, 0);
48995047

4900-
49015048
// --- URI CHARSET TABLE --- start --- generated code, do not edit ---
49025049
// Use tools/generate_uri_parser_tables.dart to generate this code
49035050
// if necessary.
@@ -4927,7 +5074,7 @@ const _userinfoMask = 0x0010;
49275074
// Characters allowed in the reg-name as of RFC 3986.
49285075
// RFC 3986 Appendix A
49295076
// reg-name = *( unreserved / pct-encoded / sub-delims )
4930-
// Same as `_userInfoMask` without the `:`.
5077+
// Same as `_userinfoMask` without the `:`.
49315078
// // [A-Za-z0-9!$%&'()*+,\-.;=_~] (including '%')
49325079
const _regNameMask = 0x0020;
49335080

@@ -4979,7 +5126,13 @@ const _uricMask = _queryCharMask;
49795126
// [:/?#[]@]
49805127
const _genDelimitersMask = 0x0400;
49815128

4982-
const String _charTables = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
5129+
// Characters valid in an IPvFuture address, RFC 3986 section 3.2.2.
5130+
// 1*( unreserved / sub-delims / ":" )
5131+
// [A-Za-z0-9\-._~]|[!$&'()*+,;=]|:
5132+
const _ipvFutureAddressCharsMask = _userinfoMask;
5133+
5134+
const String _charTables =
5135+
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
49835136
"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
49845137
"\x00\x00\x00\u03f6\x00\u0404\u03f4\x20\u03f4\u03f6\u01f6\u01f6\u03f6\u03fc"
49855138
"\u01f4\u03ff\u03ff\u0584\u03ff\u03ff\u03ff\u03ff\u03ff\u03ff\u03ff\u03ff"

0 commit comments

Comments
 (0)