Skip to content

Commit f98b4b9

Browse files
authored
Enhance URI validation (#257)
This ports the validation logic from protovalidate-go for validating URIs and URI references.
1 parent 5a6c681 commit f98b4b9

File tree

5 files changed

+1015
-160
lines changed

5 files changed

+1015
-160
lines changed

conformance/expected-failures.yaml

Lines changed: 0 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -107,96 +107,6 @@ custom_constraints:
107107
#ERROR: <input>:1:1: expression of type 'int' cannot be range of a comprehension (must be list, map, or dynamic)
108108
# | this.all(e, e == 1)
109109
# | ^
110-
library/is_uri:
111-
- invalid/host/c
112-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo@你好.com"}
113-
# want: validation error (1 violation)
114-
# 1. constraint_id: "library.is_uri"
115-
# got: valid
116-
- invalid/host_ipv6/a
117-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://2001:0db8:85a3:0000:0000:8a2e:0370:7334"}
118-
# want: validation error (1 violation)
119-
# 1. constraint_id: "library.is_uri"
120-
# got: valid
121-
- invalid/host_ipv6_zone-id_empty
122-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25]"}
123-
# want: validation error (1 violation)
124-
# 1. constraint_id: "library.is_uri"
125-
# got: valid
126-
- invalid/host_ipv6_zone-id_unquoted
127-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%eth0]"}
128-
# want: validation error (1 violation)
129-
# 1. constraint_id: "library.is_uri"
130-
# got: valid
131-
- invalid/host_reg-name_pct-encoded_invalid_utf8
132-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo%c3x%96"}
133-
# want: validation error (1 violation)
134-
# 1. constraint_id: "library.is_uri"
135-
# got: valid
136-
- invalid/port/a
137-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:8a"}
138-
# want: validation error (1 violation)
139-
# 1. constraint_id: "library.is_uri"
140-
# got: valid
141-
- invalid/port/b
142-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:x"}
143-
# want: validation error (1 violation)
144-
# 1. constraint_id: "library.is_uri"
145-
# got: valid
146-
- invalid/userinfo_reserved_at
147-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://@@example.com"}
148-
# want: validation error (1 violation)
149-
# 1. constraint_id: "library.is_uri"
150-
# got: valid
151-
- valid/host_ipfuture_exhaust
152-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[vF.-!$&'()*+,;=._~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]"}
153-
# want: valid
154-
# got: validation error (1 violation)
155-
# 1. constraint_id: "library.is_uri"
156-
# message: ""
157-
- valid/host_ipfuture_long
158-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1234AF.x]"}
159-
# want: valid
160-
# got: validation error (1 violation)
161-
# 1. constraint_id: "library.is_uri"
162-
# message: ""
163-
- valid/host_ipfuture_short
164-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1.x]"}
165-
# want: valid
166-
# got: validation error (1 violation)
167-
# 1. constraint_id: "library.is_uri"
168-
# message: ""
169-
- valid/host_ipv6_zone-id_pct-encoded_ascii
170-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%61%20%23]"}
171-
# want: valid
172-
# got: validation error (1 violation)
173-
# 1. constraint_id: "library.is_uri"
174-
# message: ""
175-
- valid/host_ipv6_zone-id_pct-encoded_utf8
176-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%c3%96]"}
177-
# want: valid
178-
# got: validation error (1 violation)
179-
# 1. constraint_id: "library.is_uri"
180-
# message: ""
181-
- valid/path-empty
182-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:"}
183-
# want: valid
184-
# got: validation error (1 violation)
185-
# 1. constraint_id: "library.is_uri"
186-
# message: ""
187-
library/is_uri_ref:
188-
- valid/empty_string
189-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{}
190-
# want: valid
191-
# got: validation error (1 violation)
192-
# 1. constraint_id: "library.is_uri_ref"
193-
# message: ""
194-
- valid/path-empty
195-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{}
196-
# want: valid
197-
# got: validation error (1 violation)
198-
# 1. constraint_id: "library.is_uri_ref"
199-
# message: ""
200110
standard_constraints/ignore:
201111
- proto/2023/map/ignore_always/invalid/populated
202112
# input: [type.googleapis.com/buf.validate.conformance.cases.EditionsMapIgnoreAlways]:{val:{key:1 value:1}}

src/main/java/build/buf/protovalidate/CustomOverload.java

Lines changed: 45 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515
package build.buf.protovalidate;
1616

1717
import com.google.common.primitives.Bytes;
18-
import java.net.URI;
19-
import java.net.URISyntaxException;
2018
import java.util.HashSet;
2119
import java.util.Locale;
2220
import java.util.Set;
@@ -62,19 +60,19 @@ final class CustomOverload {
6260
*/
6361
static Overload[] create() {
6462
return new Overload[] {
65-
format(),
66-
unique(),
67-
startsWith(),
68-
endsWith(),
69-
contains(),
63+
celFormat(),
64+
celUnique(),
65+
celStartsWith(),
66+
celEndsWith(),
67+
celContains(),
7068
celIsHostname(),
7169
celIsEmail(),
7270
celIsIp(),
7371
celIsIpPrefix(),
7472
celIsUri(),
7573
celIsUriRef(),
76-
isNan(),
77-
isInf(),
74+
celIsNan(),
75+
celIsInf(),
7876
celIsHostAndPort(),
7977
};
8078
}
@@ -84,7 +82,7 @@ static Overload[] create() {
8482
*
8583
* @return The {@link Overload} instance for the "format" operation.
8684
*/
87-
private static Overload format() {
85+
private static Overload celFormat() {
8886
return Overload.binary(
8987
OVERLOAD_FORMAT,
9088
(lhs, rhs) -> {
@@ -106,7 +104,7 @@ private static Overload format() {
106104
*
107105
* @return The {@link Overload} instance for the "unique" operation.
108106
*/
109-
private static Overload unique() {
107+
private static Overload celUnique() {
110108
return Overload.unary(
111109
OVERLOAD_UNIQUE,
112110
(val) -> {
@@ -122,7 +120,7 @@ private static Overload unique() {
122120
*
123121
* @return The {@link Overload} instance for the "startsWith" operation.
124122
*/
125-
private static Overload startsWith() {
123+
private static Overload celStartsWith() {
126124
return Overload.binary(
127125
OVERLOAD_STARTS_WITH,
128126
(lhs, rhs) -> {
@@ -157,7 +155,7 @@ private static Overload startsWith() {
157155
*
158156
* @return The {@link Overload} instance for the "endsWith" operation.
159157
*/
160-
private static Overload endsWith() {
158+
private static Overload celEndsWith() {
161159
return Overload.binary(
162160
OVERLOAD_ENDS_WITH,
163161
(lhs, rhs) -> {
@@ -192,7 +190,7 @@ private static Overload endsWith() {
192190
*
193191
* @return The {@link Overload} instance for the "contains" operation.
194192
*/
195-
private static Overload contains() {
193+
private static Overload celContains() {
196194
return Overload.binary(
197195
OVERLOAD_CONTAINS,
198196
(lhs, rhs) -> {
@@ -262,14 +260,14 @@ private static Overload celIsIp() {
262260
return Err.noSuchOverload(value, OVERLOAD_IS_IP, null);
263261
}
264262
String addr = (String) value.value();
265-
return Types.boolOf(isIP(addr, 0L));
263+
return Types.boolOf(isIp(addr, 0L));
266264
},
267265
(lhs, rhs) -> {
268266
if (lhs.type().typeEnum() != TypeEnum.String || rhs.type().typeEnum() != TypeEnum.Int) {
269267
return Err.noSuchOverload(lhs, OVERLOAD_IS_IP, rhs);
270268
}
271269
String address = (String) lhs.value();
272-
return Types.boolOf(isIP(address, rhs.intValue()));
270+
return Types.boolOf(isIp(address, rhs.intValue()));
273271
},
274272
null);
275273
}
@@ -289,7 +287,7 @@ private static Overload celIsIpPrefix() {
289287
return Err.noSuchOverload(value, OVERLOAD_IS_IP_PREFIX, null);
290288
}
291289
String prefix = (String) value.value();
292-
return Types.boolOf(isIPPrefix(prefix, 0L, false));
290+
return Types.boolOf(isIpPrefix(prefix, 0L, false));
293291
},
294292
(lhs, rhs) -> {
295293
if (lhs.type().typeEnum() != TypeEnum.String
@@ -299,9 +297,9 @@ private static Overload celIsIpPrefix() {
299297
}
300298
String prefix = (String) lhs.value();
301299
if (rhs.type().typeEnum() == TypeEnum.Int) {
302-
return Types.boolOf(isIPPrefix(prefix, rhs.intValue(), false));
300+
return Types.boolOf(isIpPrefix(prefix, rhs.intValue(), false));
303301
}
304-
return Types.boolOf(isIPPrefix(prefix, 0L, rhs.booleanValue()));
302+
return Types.boolOf(isIpPrefix(prefix, 0L, rhs.booleanValue()));
305303
},
306304
(values) -> {
307305
if (values.length != 3
@@ -311,7 +309,7 @@ private static Overload celIsIpPrefix() {
311309
return Err.noSuchOverload(values[0], OVERLOAD_IS_IP_PREFIX, "", values);
312310
}
313311
String prefix = (String) values[0].value();
314-
return Types.boolOf(isIPPrefix(prefix, values[1].intValue(), values[2].booleanValue()));
312+
return Types.boolOf(isIpPrefix(prefix, values[1].intValue(), values[2].booleanValue()));
315313
});
316314
}
317315

@@ -328,10 +326,7 @@ private static Overload celIsUri() {
328326
return Err.noSuchOverload(value, OVERLOAD_IS_URI, null);
329327
}
330328
String addr = (String) value.value();
331-
if (addr.isEmpty()) {
332-
return BoolT.False;
333-
}
334-
return Types.boolOf(validateURI(addr, true));
329+
return Types.boolOf(isUri(addr));
335330
});
336331
}
337332

@@ -348,10 +343,7 @@ private static Overload celIsUriRef() {
348343
return Err.noSuchOverload(value, OVERLOAD_IS_URI_REF, null);
349344
}
350345
String addr = (String) value.value();
351-
if (addr.isEmpty()) {
352-
return BoolT.False;
353-
}
354-
return Types.boolOf(validateURI(addr, false));
346+
return Types.boolOf(isUriRef(addr));
355347
});
356348
}
357349

@@ -360,7 +352,7 @@ private static Overload celIsUriRef() {
360352
*
361353
* @return The {@link Overload} instance for the "isNan" operation.
362354
*/
363-
private static Overload isNan() {
355+
private static Overload celIsNan() {
364356
return Overload.unary(
365357
OVERLOAD_IS_NAN,
366358
value -> {
@@ -377,7 +369,7 @@ private static Overload isNan() {
377369
*
378370
* @return The {@link Overload} instance for the "isInf" operation.
379371
*/
380-
private static Overload isInf() {
372+
private static Overload celIsInf() {
381373
return Overload.overload(
382374
OVERLOAD_IS_INF,
383375
null,
@@ -448,21 +440,21 @@ private static boolean isHostAndPort(String str, boolean portRequired) {
448440

449441
int endPlus = end + 1;
450442
if (endPlus == str.length()) { // no port
451-
return !portRequired && isIP(str.substring(1, end), 6);
443+
return !portRequired && isIp(str.substring(1, end), 6);
452444
} else if (endPlus == splitIdx) { // port
453-
return isIP(str.substring(1, end), 6) && isPort(str.substring(splitIdx + 1));
445+
return isIp(str.substring(1, end), 6) && isPort(str.substring(splitIdx + 1));
454446
}
455447
return false; // malformed
456448
}
457449

458450
if (splitIdx < 0) {
459-
return !portRequired && (isHostname(str) || isIP(str, 4));
451+
return !portRequired && (isHostname(str) || isIp(str, 4));
460452
}
461453

462454
String host = str.substring(0, splitIdx);
463455
String port = str.substring(splitIdx + 1);
464456

465-
return ((isHostname(host) || isIP(host, 4)) && isPort(port));
457+
return ((isHostname(host) || isIp(host, 4)) && isPort(port));
466458
}
467459

468460
// Returns true if the string is a valid port for isHostAndPort.
@@ -606,7 +598,7 @@ private static boolean isHostname(String val) {
606598
* <p>Both formats are well-defined in the internet standard RFC 3986. Zone identifiers for IPv6
607599
* addresses (for example "fe80::a%en1") are supported.
608600
*/
609-
private static boolean isIP(String addr, long ver) {
601+
static boolean isIp(String addr, long ver) {
610602
if (ver == 6L) {
611603
return new Ipv6(addr).address();
612604
} else if (ver == 4L) {
@@ -618,22 +610,24 @@ private static boolean isIP(String addr, long ver) {
618610
}
619611

620612
/**
621-
* Validates if the input string is a valid URI, which can be a URL or a URN.
613+
* Returns true if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag".
622614
*
623-
* @param val The input string to validate as a URI.
624-
* @param checkAbsolute Whether to check if this URI is absolute (i.e. has a scheme component)
625-
* @return {@code true} if the input string is a valid URI, {@code false} otherwise.
615+
* <p>URI is defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals
616+
* are supported (RFC 6874).
626617
*/
627-
private static boolean validateURI(String val, boolean checkAbsolute) {
628-
try {
629-
URI uri = new URI(val);
630-
if (checkAbsolute) {
631-
return uri.isAbsolute();
632-
}
633-
return true;
634-
} catch (URISyntaxException e) {
635-
return false;
636-
}
618+
private static boolean isUri(String str) {
619+
return new Uri(str).uri();
620+
}
621+
622+
/**
623+
* Returns true if the string is a URI Reference - a URI such as
624+
* "https://example.com/foo/bar?baz=quux#frag", or a Relative Reference such as "./foo/bar?query".
625+
*
626+
* <p>URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986.
627+
* Zone Identifiers in IPv6 address literals are supported (RFC 6874).
628+
*/
629+
private static boolean isUriRef(String str) {
630+
return new Uri(str).uriReference();
637631
}
638632

639633
/**
@@ -653,15 +647,15 @@ private static boolean validateURI(String val, boolean checkAbsolute) {
653647
* <p>The same principle applies to IPv4 addresses. "192.168.1.0/24" designates the first 24 bits
654648
* of the 32-bit IPv4 as the network prefix.
655649
*/
656-
private static boolean isIPPrefix(String str, long version, boolean strict) {
650+
private static boolean isIpPrefix(String str, long version, boolean strict) {
657651
if (version == 6L) {
658652
Ipv6 ip = new Ipv6(str);
659653
return ip.addressPrefix() && (!strict || ip.isPrefixOnly());
660654
} else if (version == 4L) {
661655
Ipv4 ip = new Ipv4(str);
662656
return ip.addressPrefix() && (!strict || ip.isPrefixOnly());
663657
} else if (version == 0L) {
664-
return isIPPrefix(str, 6, strict) || isIPPrefix(str, 4, strict);
658+
return isIpPrefix(str, 6, strict) || isIpPrefix(str, 4, strict);
665659
}
666660
return false;
667661
}

0 commit comments

Comments
 (0)