diff --git a/conformance/expected-failures.yaml b/conformance/expected-failures.yaml index 5d92bdca..0db100ef 100644 --- a/conformance/expected-failures.yaml +++ b/conformance/expected-failures.yaml @@ -107,96 +107,6 @@ custom_constraints: #ERROR: :1:1: expression of type 'int' cannot be range of a comprehension (must be list, map, or dynamic) # | this.all(e, e == 1) # | ^ -library/is_uri: - - invalid/host/c - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo@你好.com"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/host_ipv6/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://2001:0db8:85a3:0000:0000:8a2e:0370:7334"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/host_ipv6_zone-id_empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/host_ipv6_zone-id_unquoted - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%eth0]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/host_reg-name_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo%c3x%96"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/port/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:8a"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/port/b - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/userinfo_reserved_at - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://@@example.com"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - valid/host_ipfuture_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[vF.-!$&'()*+,;=._~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - - valid/host_ipfuture_long - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1234AF.x]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - - valid/host_ipfuture_short - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1.x]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - - valid/host_ipv6_zone-id_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%61%20%23]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - - valid/host_ipv6_zone-id_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%c3%96]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - - valid/path-empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" -library/is_uri_ref: - - valid/empty_string - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - - valid/path-empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" standard_constraints/ignore: - proto/2023/map/ignore_always/invalid/populated # input: [type.googleapis.com/buf.validate.conformance.cases.EditionsMapIgnoreAlways]:{val:{key:1 value:1}} diff --git a/src/main/java/build/buf/protovalidate/CustomOverload.java b/src/main/java/build/buf/protovalidate/CustomOverload.java index 889b0f78..94cc4819 100644 --- a/src/main/java/build/buf/protovalidate/CustomOverload.java +++ b/src/main/java/build/buf/protovalidate/CustomOverload.java @@ -15,8 +15,6 @@ package build.buf.protovalidate; import com.google.common.primitives.Bytes; -import java.net.URI; -import java.net.URISyntaxException; import java.util.HashSet; import java.util.Locale; import java.util.Set; @@ -62,19 +60,19 @@ final class CustomOverload { */ static Overload[] create() { return new Overload[] { - format(), - unique(), - startsWith(), - endsWith(), - contains(), + celFormat(), + celUnique(), + celStartsWith(), + celEndsWith(), + celContains(), celIsHostname(), celIsEmail(), celIsIp(), celIsIpPrefix(), celIsUri(), celIsUriRef(), - isNan(), - isInf(), + celIsNan(), + celIsInf(), celIsHostAndPort(), }; } @@ -84,7 +82,7 @@ static Overload[] create() { * * @return The {@link Overload} instance for the "format" operation. */ - private static Overload format() { + private static Overload celFormat() { return Overload.binary( OVERLOAD_FORMAT, (lhs, rhs) -> { @@ -106,7 +104,7 @@ private static Overload format() { * * @return The {@link Overload} instance for the "unique" operation. */ - private static Overload unique() { + private static Overload celUnique() { return Overload.unary( OVERLOAD_UNIQUE, (val) -> { @@ -122,7 +120,7 @@ private static Overload unique() { * * @return The {@link Overload} instance for the "startsWith" operation. */ - private static Overload startsWith() { + private static Overload celStartsWith() { return Overload.binary( OVERLOAD_STARTS_WITH, (lhs, rhs) -> { @@ -157,7 +155,7 @@ private static Overload startsWith() { * * @return The {@link Overload} instance for the "endsWith" operation. */ - private static Overload endsWith() { + private static Overload celEndsWith() { return Overload.binary( OVERLOAD_ENDS_WITH, (lhs, rhs) -> { @@ -192,7 +190,7 @@ private static Overload endsWith() { * * @return The {@link Overload} instance for the "contains" operation. */ - private static Overload contains() { + private static Overload celContains() { return Overload.binary( OVERLOAD_CONTAINS, (lhs, rhs) -> { @@ -262,14 +260,14 @@ private static Overload celIsIp() { return Err.noSuchOverload(value, OVERLOAD_IS_IP, null); } String addr = (String) value.value(); - return Types.boolOf(isIP(addr, 0L)); + return Types.boolOf(isIp(addr, 0L)); }, (lhs, rhs) -> { if (lhs.type().typeEnum() != TypeEnum.String || rhs.type().typeEnum() != TypeEnum.Int) { return Err.noSuchOverload(lhs, OVERLOAD_IS_IP, rhs); } String address = (String) lhs.value(); - return Types.boolOf(isIP(address, rhs.intValue())); + return Types.boolOf(isIp(address, rhs.intValue())); }, null); } @@ -289,7 +287,7 @@ private static Overload celIsIpPrefix() { return Err.noSuchOverload(value, OVERLOAD_IS_IP_PREFIX, null); } String prefix = (String) value.value(); - return Types.boolOf(isIPPrefix(prefix, 0L, false)); + return Types.boolOf(isIpPrefix(prefix, 0L, false)); }, (lhs, rhs) -> { if (lhs.type().typeEnum() != TypeEnum.String @@ -299,9 +297,9 @@ private static Overload celIsIpPrefix() { } String prefix = (String) lhs.value(); if (rhs.type().typeEnum() == TypeEnum.Int) { - return Types.boolOf(isIPPrefix(prefix, rhs.intValue(), false)); + return Types.boolOf(isIpPrefix(prefix, rhs.intValue(), false)); } - return Types.boolOf(isIPPrefix(prefix, 0L, rhs.booleanValue())); + return Types.boolOf(isIpPrefix(prefix, 0L, rhs.booleanValue())); }, (values) -> { if (values.length != 3 @@ -311,7 +309,7 @@ private static Overload celIsIpPrefix() { return Err.noSuchOverload(values[0], OVERLOAD_IS_IP_PREFIX, "", values); } String prefix = (String) values[0].value(); - return Types.boolOf(isIPPrefix(prefix, values[1].intValue(), values[2].booleanValue())); + return Types.boolOf(isIpPrefix(prefix, values[1].intValue(), values[2].booleanValue())); }); } @@ -328,10 +326,7 @@ private static Overload celIsUri() { return Err.noSuchOverload(value, OVERLOAD_IS_URI, null); } String addr = (String) value.value(); - if (addr.isEmpty()) { - return BoolT.False; - } - return Types.boolOf(validateURI(addr, true)); + return Types.boolOf(isUri(addr)); }); } @@ -348,10 +343,7 @@ private static Overload celIsUriRef() { return Err.noSuchOverload(value, OVERLOAD_IS_URI_REF, null); } String addr = (String) value.value(); - if (addr.isEmpty()) { - return BoolT.False; - } - return Types.boolOf(validateURI(addr, false)); + return Types.boolOf(isUriRef(addr)); }); } @@ -360,7 +352,7 @@ private static Overload celIsUriRef() { * * @return The {@link Overload} instance for the "isNan" operation. */ - private static Overload isNan() { + private static Overload celIsNan() { return Overload.unary( OVERLOAD_IS_NAN, value -> { @@ -377,7 +369,7 @@ private static Overload isNan() { * * @return The {@link Overload} instance for the "isInf" operation. */ - private static Overload isInf() { + private static Overload celIsInf() { return Overload.overload( OVERLOAD_IS_INF, null, @@ -448,21 +440,21 @@ private static boolean isHostAndPort(String str, boolean portRequired) { int endPlus = end + 1; if (endPlus == str.length()) { // no port - return !portRequired && isIP(str.substring(1, end), 6); + return !portRequired && isIp(str.substring(1, end), 6); } else if (endPlus == splitIdx) { // port - return isIP(str.substring(1, end), 6) && isPort(str.substring(splitIdx + 1)); + return isIp(str.substring(1, end), 6) && isPort(str.substring(splitIdx + 1)); } return false; // malformed } if (splitIdx < 0) { - return !portRequired && (isHostname(str) || isIP(str, 4)); + return !portRequired && (isHostname(str) || isIp(str, 4)); } String host = str.substring(0, splitIdx); String port = str.substring(splitIdx + 1); - return ((isHostname(host) || isIP(host, 4)) && isPort(port)); + return ((isHostname(host) || isIp(host, 4)) && isPort(port)); } // Returns true if the string is a valid port for isHostAndPort. @@ -606,7 +598,7 @@ private static boolean isHostname(String val) { *

Both formats are well-defined in the internet standard RFC 3986. Zone identifiers for IPv6 * addresses (for example "fe80::a%en1") are supported. */ - private static boolean isIP(String addr, long ver) { + static boolean isIp(String addr, long ver) { if (ver == 6L) { return new Ipv6(addr).address(); } else if (ver == 4L) { @@ -618,22 +610,24 @@ private static boolean isIP(String addr, long ver) { } /** - * Validates if the input string is a valid URI, which can be a URL or a URN. + * Returns true if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". * - * @param val The input string to validate as a URI. - * @param checkAbsolute Whether to check if this URI is absolute (i.e. has a scheme component) - * @return {@code true} if the input string is a valid URI, {@code false} otherwise. + *

URI is defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals + * are supported (RFC 6874). */ - private static boolean validateURI(String val, boolean checkAbsolute) { - try { - URI uri = new URI(val); - if (checkAbsolute) { - return uri.isAbsolute(); - } - return true; - } catch (URISyntaxException e) { - return false; - } + private static boolean isUri(String str) { + return new Uri(str).uri(); + } + + /** + * Returns true if the string is a URI Reference - a URI such as + * "https://example.com/foo/bar?baz=quux#frag", or a Relative Reference such as "./foo/bar?query". + * + *

URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. + * Zone Identifiers in IPv6 address literals are supported (RFC 6874). + */ + private static boolean isUriRef(String str) { + return new Uri(str).uriReference(); } /** @@ -653,7 +647,7 @@ private static boolean validateURI(String val, boolean checkAbsolute) { *

The same principle applies to IPv4 addresses. "192.168.1.0/24" designates the first 24 bits * of the 32-bit IPv4 as the network prefix. */ - private static boolean isIPPrefix(String str, long version, boolean strict) { + private static boolean isIpPrefix(String str, long version, boolean strict) { if (version == 6L) { Ipv6 ip = new Ipv6(str); return ip.addressPrefix() && (!strict || ip.isPrefixOnly()); @@ -661,7 +655,7 @@ private static boolean isIPPrefix(String str, long version, boolean strict) { Ipv4 ip = new Ipv4(str); return ip.addressPrefix() && (!strict || ip.isPrefixOnly()); } else if (version == 0L) { - return isIPPrefix(str, 6, strict) || isIPPrefix(str, 4, strict); + return isIpPrefix(str, 6, strict) || isIpPrefix(str, 4, strict); } return false; } diff --git a/src/main/java/build/buf/protovalidate/Ipv4.java b/src/main/java/build/buf/protovalidate/Ipv4.java index 52f249a3..c3726b64 100644 --- a/src/main/java/build/buf/protovalidate/Ipv4.java +++ b/src/main/java/build/buf/protovalidate/Ipv4.java @@ -17,6 +17,9 @@ import java.util.ArrayList; import java.util.List; +/** + * Ipv4 is a class used to parse a given string to determine if it is an IPv4 address or address prefix. + */ final class Ipv4 { private String str; private int index; @@ -76,7 +79,7 @@ boolean addressPrefix() { && this.index == this.str.length(); } - // Stores value in `prefixLen` + // Store value in prefixLen private boolean prefixLength() { int start = this.index; @@ -169,9 +172,9 @@ private boolean decOctet() { } /** - * Reports whether the current position is a digit. + * Determines whether the current position is a digit. * - *

Method parses the rule: + *

Parses the rule: * *

DIGIT = %x30-39 ; 0-9
    */
@@ -185,9 +188,7 @@ private boolean digit() {
   }
 
   /**
-   * Take the given char at the current index.
-   *
-   * 

If char is at the current index, increment the index. + * Take the given char at the current position, incrementing the index if necessary. */ private boolean take(char c) { if (this.index >= this.str.length()) { diff --git a/src/main/java/build/buf/protovalidate/Ipv6.java b/src/main/java/build/buf/protovalidate/Ipv6.java index aa178943..db14eb68 100644 --- a/src/main/java/build/buf/protovalidate/Ipv6.java +++ b/src/main/java/build/buf/protovalidate/Ipv6.java @@ -18,6 +18,9 @@ import java.util.List; import javax.annotation.Nullable; +/** + * Ipv6 is a class used to parse a given string to determine if it is an IPv6 address or address prefix. + */ final class Ipv6 { private String str; private int index; @@ -121,7 +124,7 @@ boolean addressPrefix() { && this.index == this.str.length(); } - // Stores value in `prefixLen` + // Stores value in prefixLen private boolean prefixLength() { int start = this.index; @@ -159,7 +162,7 @@ private boolean prefixLength() { } } - // Stores dotted notation for right-most 32 bits in `dottedRaw` / `dottedAddr` if found. + // Stores dotted notation for right-most 32 bits in dottedRaw / dottedAddr if found. private boolean addressPart() { while (this.index < this.str.length()) { // dotted notation for right-most 32 bits, e.g. 0:0:0:0:0:ffff:192.1.56.10 @@ -227,9 +230,9 @@ private boolean zoneID() { } /** - * Determines whether string contains a dotted address. + * Determines whether the current position is a dotted address. * - *

Method parses the rule: + *

Parses the rule: * *

1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
    *
@@ -254,9 +257,9 @@ private boolean dotted() {
   }
 
   /**
-   * Determine whether string contains an h16.
+   * Determines whether the current position is an h16.
    *
-   * 

Method parses the rule: + *

Parses the rule: * *

h16 = 1*4HEXDIG
    *
@@ -291,9 +294,9 @@ private boolean h16() {
   }
 
   /**
-   * Reports whether the current position is a hex digit.
+   * Determines whether the current position is a hex digit.
    *
-   * 

Method parses the rule: + *

Parses the rule: * *

HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
    */
@@ -310,9 +313,9 @@ private boolean hexDig() {
   }
 
   /**
-   * Reports whether the current position is a digit.
+   * Determines whether the current position is a digit.
    *
-   * 

Method parses the rule: + *

Parses the rule: * *

DIGIT = %x30-39 ; 0-9
    */
@@ -326,9 +329,7 @@ private boolean digit() {
   }
 
   /**
-   * Take the given char at the current index.
-   *
-   * 

If char is at the current index, increment the index. + * Take the given char at the current position, incrementing the index if necessary. */ private boolean take(char c) { if (this.index >= this.str.length()) { diff --git a/src/main/java/build/buf/protovalidate/Uri.java b/src/main/java/build/buf/protovalidate/Uri.java new file mode 100644 index 00000000..63287135 --- /dev/null +++ b/src/main/java/build/buf/protovalidate/Uri.java @@ -0,0 +1,949 @@ +// Copyright 2023-2024 Buf Technologies, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package build.buf.protovalidate; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; +import java.nio.charset.StandardCharsets; + +/** + * Ipv6 is a class used to parse a given string to determine if it is a URI or URI reference. + */ +final class Uri { + private String str; + private int index; + private boolean pctEncodedFound; + + Uri(String str) { + this.str = str; + } + + /** + * Determines whether string is a valid URI. + * + *

Parses the rule: + * + *

URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+   */
+  boolean uri() {
+    int start = this.index;
+
+    if (!(this.scheme() && this.take(':') && this.hierPart())) {
+      this.index = start;
+      return false;
+    }
+
+    if (this.take('?') && !this.query()) {
+      return false;
+    }
+
+    if (this.take('#') && !this.fragment()) {
+      return false;
+    }
+
+    if (this.index != this.str.length()) {
+      this.index = start;
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Determines whether the current position is a valid hier-part.
+   *
+   * 

Parses the rule: + * + *

hier-part = "//" authority path-abempty
+   *                / path-absolute
+   *                / path-rootless
+   *                / path-empty
+   */
+  private boolean hierPart() {
+    int start = this.index;
+
+    if (this.takeDoubleSlash() && this.authority() && this.pathAbempty()) {
+      return true;
+    }
+
+    this.index = start;
+
+    return this.pathAbsolute() || this.pathRootless() || this.pathEmpty();
+  }
+
+  /**
+   * Determines whether string is a valid URI reference.
+   *
+   * 

Parses the rule: + * + *

URI-reference = URI / relative-ref
+   */
+  boolean uriReference() {
+    return this.uri() || this.relativeRef();
+  }
+
+  /**
+   * Determines whether the current position is a valid relative reference.
+   *
+   * 

Parses the rule: + * + *

relative-ref = relative-part [ "?" query ] [ "#" fragment ].
+   */
+  private boolean relativeRef() {
+    int start = this.index;
+
+    if (!this.relativePart()) {
+      return false;
+    }
+
+    if (this.take('?') && !this.query()) {
+      this.index = start;
+      return false;
+    }
+
+    if (this.take('#') && !this.fragment()) {
+      this.index = start;
+      return false;
+    }
+
+    if (this.index != this.str.length()) {
+      this.index = start;
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Determines whether the current position is a valid relative part.
+   *
+   * 

Parses the rule: + * + *

relative-part = "//" authority path-abempty
+   *                    / path-absolute
+   *                    / path-noscheme
+   *                    / path-empty
+   */
+  private boolean relativePart() {
+    int start = this.index;
+
+    if (this.takeDoubleSlash() && this.authority() && this.pathAbempty()) {
+      return true;
+    }
+
+    this.index = start;
+
+    return this.pathAbsolute() || this.pathNoscheme() || this.pathEmpty();
+  }
+
+  private boolean takeDoubleSlash() {
+    boolean isSlash = take('/');
+
+    return isSlash && take('/');
+  }
+
+  /**
+   * Determines whether the current position is a valid scheme.
+   *
+   * 

Parses the rule: + * + *

scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+   */
+  private boolean scheme() {
+    int start = this.index;
+
+    if (this.alpha()) {
+      while (this.alpha() || this.digit() || this.take('+') || this.take('-') || this.take('.')) {}
+
+      if (this.str.charAt(this.index) == ':') {
+        return true;
+      }
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid authority.
+   *
+   * 

Parses the rule: + * + *

authority = [ userinfo "@" ] host [ ":" port ]
+   *
+   * Lead by double slash ("") and terminated by "/", "?", "#", or end of URI.
+   */
+  private boolean authority() {
+    int start = this.index;
+
+    if (this.userinfo()) {
+      if (!this.take('@')) {
+        this.index = start;
+        return false;
+      }
+    }
+
+    if (!this.host()) {
+      this.index = start;
+      return false;
+    }
+
+    if (this.take(':')) {
+      if (!this.port()) {
+        this.index = start;
+        return false;
+      }
+    }
+
+    if (!this.isAuthorityEnd()) {
+      this.index = start;
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * Determines whether the current position is the end of the authority.
+   *
+   * 

The authority component [...] is terminated by one of the following: + * + *

    + *
  • the next slash ("/") + *
  • question mark ("?") + *
  • number sign ("#") character + *
  • the end of the URI. + *
+ */ + private boolean isAuthorityEnd() { + if (this.index >= this.str.length()) { + return true; + } + char c = this.str.charAt(this.index); + return (c == '?' || c == '#' || c == '/'); + } + + /** + * Determines whether the current position is a valid userinfo. + * + *

Parses the rule: + * + *

userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+   *
+   * Terminated by "@" in authority.
+   */
+  private boolean userinfo() {
+    int start = this.index;
+
+    while (true) {
+      if (this.unreserved() || this.pctEncoded() || this.subDelims() || this.take(':')) {
+        continue;
+      }
+
+      if (this.index < this.str.length()) {
+        if (this.str.charAt(this.index) == '@') {
+          return true;
+        }
+      }
+
+      this.index = start;
+
+      return false;
+    }
+  }
+
+  private static int unhex(char c) {
+    if ('0' <= c && c <= '9') {
+      return c - '0';
+    } else if ('a' <= c && c <= 'f') {
+      return c - 'a' + 10;
+    } else if ('A' <= c && c <= 'F') {
+      return c - 'A' + 10;
+    }
+
+    return 0;
+  }
+
+  /**
+   * Verifies that str is correctly percent-encoded.
+   *
+   * 

Note that we essentially want to mimic the behavior of decodeURIComponent, which would fail + * on malformed URLs. Java does have various methods for decoding URLs, but none behave + * consistently with decodeURIComponent. + * + *

The code below is a combination of `checkHostPctEncoded` from the protovalidate-go + * implementation and Java's java.net.URI#decode methods. + */ + private boolean checkHostPctEncoded(String str) { + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder(); + + int strLen = str.length(); + ByteBuffer buffer = ByteBuffer.allocate(strLen); + CharBuffer out = CharBuffer.allocate(strLen); + + // Unhex str and convert to a ByteBuffer. + for (int i = 0; i < str.length(); ) { + if (str.charAt(i) == '%') { + // If we encounter a %, unhex the two following digits, extract their + // last 4 bits, cast to a byte. + byte b = + (byte) + (((unhex(str.charAt(i + 1)) & 0xf) << 4) | ((unhex(str.charAt(i + 2)) & 0xf) << 0)); + buffer.put(b); + i += 3; + } else { + // Not percent encoded, extract the last 4 bits, convert to a byte + // and add to the byte buffer. + buffer.put((byte) (str.charAt(i) & 0xf)); + i++; + } + } + + // Attempt to decode the byte buffer as UTF-8. + CoderResult f = decoder.decode((ByteBuffer) buffer.flip(), out, true); + + // If an error occurred, return false as invalid. + if (f.isError()) { + return false; + } + // Flush the buffer + f = decoder.flush(out); + + // If an error occurred, return false as invalid. + // Otherwise return true. + return !f.isError(); + } + + /** + * Determines whether the current position is a valid host. + * + *

Parses the rule: + * + *

host = IP-literal / IPv4address / reg-name.
+   */
+  private boolean host() {
+    if (this.index >= this.str.length()) {
+      return true;
+    }
+
+    int start = this.index;
+    this.pctEncodedFound = false;
+
+    // Note: IPv4address is a subset of reg-name
+    if ((this.str.charAt(this.index) == '[' && this.ipLiteral()) || this.regName()) {
+      if (this.pctEncodedFound) {
+        String rawHost = this.str.substring(start, this.index);
+        // RFC 3986:
+        // > URI producing applications must not use percent-encoding in host
+        // > unless it is used to represent a UTF-8 character sequence.
+        if (!this.checkHostPctEncoded(rawHost)) {
+          return false;
+        }
+      }
+
+      return true;
+    }
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid port.
+   *
+   * 

Parses the rule: + * + *

port = *DIGIT
+   *
+   * Terminated by end of authority.
+   */
+  private boolean port() {
+    int start = this.index;
+
+    while (true) {
+      if (this.digit()) {
+        continue;
+      }
+
+      if (this.isAuthorityEnd()) {
+        return true;
+      }
+
+      this.index = start;
+
+      return false;
+    }
+  }
+
+  /**
+   * Determines whether the current position is a valid IP literal.
+   *
+   * 

Parses the rule from RFC 6874: + * + *

IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture  ) "]"
+   */
+  private boolean ipLiteral() {
+    int start = this.index;
+
+    if (this.take('[')) {
+      int j = this.index;
+
+      if (this.ipv6Address() && this.take(']')) {
+        return true;
+      }
+
+      this.index = j;
+
+      if (this.ipv6Addrz() && this.take(']')) {
+        return true;
+      }
+
+      this.index = j;
+
+      if (this.ipvFuture() && this.take(']')) {
+        return true;
+      }
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid ipv6 address.
+   *
+   * 

ipv6Address parses the rule "IPv6address". + * + *

Relies on the implementation of isIp. + */ + private boolean ipv6Address() { + int start = this.index; + + while (this.hexDig() || this.take(':')) {} + + if (CustomOverload.isIp(this.str.substring(start, this.index), 6)) { + return true; + } + + this.index = start; + + return false; + } + + /** + * Determines whether the current position is a valid IPv6addrz. + * + * Parses the rule: + * + *

IPv6addrz = IPv6address "%25" ZoneID
+   */
+  private boolean ipv6Addrz() {
+    int start = this.index;
+
+    if (this.ipv6Address() && this.take('%') && this.take('2') && this.take('5') && this.zoneID()) {
+      return true;
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid zone ID.
+   *
+   * Parses the rule:
+   *
+   * 
ZoneID = 1*( unreserved / pct-encoded )
+   */
+  private boolean zoneID() {
+    int start = this.index;
+
+    while (this.unreserved() || this.pctEncoded()) {}
+
+    if (this.index - start > 0) {
+      return true;
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid IPvFuture.
+   *
+   * Parses the rule:
+   *
+   * 
IPvFuture  = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
+   */
+  private boolean ipvFuture() {
+    int start = this.index;
+
+    if (this.take('v') && this.hexDig()) {
+      while (this.hexDig()) {}
+
+      if (this.take('.')) {
+        int j = 0;
+
+        while (this.unreserved() || this.subDelims() || this.take(':')) {
+          j++;
+        }
+
+        if (j >= 1) {
+          return true;
+        }
+      }
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid reg-name.
+   *
+   * Parses the rule:
+   *
+   * 
reg-name = *( unreserved / pct-encoded / sub-delims )
+   *
+   * Terminates on start of port (":") or end of authority.
+   */
+  private boolean regName() {
+    int start = this.index;
+
+    while (true) {
+      if (this.unreserved() || this.pctEncoded() || this.subDelims()) {
+        continue;
+      }
+
+      if (this.isAuthorityEnd()) {
+        // End of authority
+        return true;
+      }
+
+      if (this.str.charAt(this.index) == ':') {
+        return true;
+      }
+
+      this.index = start;
+
+      return false;
+    }
+  }
+
+  /**
+   * Determines whether the current position is the end of the path.
+   *
+   * 

The path is terminated by one of the following: + * + *

    + *
  • the first question mark ("?") + *
  • number sign ("#") character + *
  • the end of the URI. + *
+ */ + private boolean isPathEnd() { + if (this.index >= this.str.length()) { + return true; + } + + char c = this.str.charAt(this.index); + + return (c == '?' || c == '#'); + } + + /** + * Determines whether the current position is a valid path-abempty. + * + * Parses the rule: + * + *
path-abempty = *( "/" segment )
+   *
+   * Terminated by end of path: "?", "#", or end of URI.
+   */
+  private boolean pathAbempty() {
+    int start = this.index;
+
+    while (this.take('/') && this.segment()) {}
+
+    if (this.isPathEnd()) {
+      return true;
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid path-absolute.
+   *
+   * Parses the rule:
+   *
+   * 
path-absolute = "/" [ segment-nz *( "/" segment ) ]
+   *
+   * Terminated by end of path: "?", "#", or end of URI.
+   */
+  private boolean pathAbsolute() {
+    int start = this.index;
+
+    if (this.take('/')) {
+      if (this.segmentNz()) {
+        while (this.take('/') && this.segment()) {}
+      }
+
+      if (this.isPathEnd()) {
+        return true;
+      }
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid path-noscheme.
+   *
+   * Parses the rule:
+   *
+   * 
path-noscheme = segment-nz-nc *( "/" segment )
+   *
+   * Terminated by end of path: "?", "#", or end of URI.
+   */
+  private boolean pathNoscheme() {
+    int start = this.index;
+
+    if (this.segmentNzNc()) {
+      while (this.take('/') && this.segment()) {}
+
+      if (this.isPathEnd()) {
+        return true;
+      }
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid path-rootless.
+   *
+   * Parses the rule:
+   *
+   * 
path-rootless = segment-nz *( "/" segment )
+   *
+   * Terminated by end of path: "?", "#", or end of URI.
+   */
+  private boolean pathRootless() {
+    int start = this.index;
+
+    if (this.segmentNz()) {
+      while (this.take('/') && this.segment()) {}
+
+      if (this.isPathEnd()) {
+        return true;
+      }
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid path-empty.
+   *
+   * Parses the rule:
+   *
+   * 
path-empty = 0
+   *
+   * Terminated by end of path: "?", "#", or end of URI.
+   */
+  private boolean pathEmpty() {
+    return this.isPathEnd();
+  }
+
+  /**
+   * Determines whether the current position is a valid segment.
+   *
+   * Parses the rule:
+   *
+   * 
segment = *pchar
+   */
+  private boolean segment() {
+    while (this.pchar()) {}
+
+    return true;
+  }
+
+  /**
+   * Determines whether the current position is a valid segment-nz.
+   *
+   * Parses the rule:
+   *
+   * 
segment-nz = 1*pchar
+   */
+  private boolean segmentNz() {
+    int start = this.index;
+
+    if (this.pchar()) {
+      while (this.pchar()) {}
+      return true;
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid segment-nz-nc.
+   *
+   * Parses the rule:
+   *
+   * 
segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+   *                   ; non-zero-length segment without any colon ":"
+   */
+  private boolean segmentNzNc() {
+    int start = this.index;
+
+    while (this.unreserved() || this.pctEncoded() || this.subDelims() || this.take('@')) {}
+
+    if (this.index - start > 0) {
+      return true;
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a valid pchar.
+   *
+   * Parses the rule:
+   *
+   * 
pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+   */
+  private boolean pchar() {
+    return (this.unreserved()
+        || this.pctEncoded()
+        || this.subDelims()
+        || this.take(':')
+        || this.take('@'));
+  }
+
+  /**
+   * Determines whether the current position is a valid query.
+   *
+   * Parses the rule:
+   *
+   * 
query = *( pchar / "/" / "?" )
+   *
+   * Terminated by "#" or end of URI.
+   */
+  private boolean query() {
+    int start = this.index;
+
+    while (true) {
+      if (this.pchar() || this.take('/') || this.take('?')) {
+        continue;
+      }
+
+      if (this.index == this.str.length() || this.str.charAt(this.index) == '#') {
+        return true;
+      }
+
+      this.index = start;
+
+      return false;
+    }
+  }
+
+  /**
+   * Determines whether the current position is a valid fragment.
+   *
+   * Parses the rule:
+   *
+   * 
fragment = *( pchar / "/" / "?" )
+   *
+   * Terminated by end of URI.
+   */
+  private boolean fragment() {
+    int start = this.index;
+
+    while (true) {
+      if (this.pchar() || this.take('/') || this.take('?')) {
+        continue;
+      }
+
+      if (this.index == this.str.length()) {
+        return true;
+      }
+
+      this.index = start;
+
+      return false;
+    }
+  }
+
+  /**
+   * Determines whether the current position is a valid pct-encoded.
+   *
+   * Parses the rule:
+   *
+   * 
pct-encoded = "%"+HEXDIG+HEXDIG
+   *
+   * Sets `pctEncodedFound` to true if a valid triplet was found.
+   */
+  private boolean pctEncoded() {
+    int start = this.index;
+
+    if (this.take('%') && this.hexDig() && this.hexDig()) {
+      this.pctEncodedFound = true;
+
+      return true;
+    }
+
+    this.index = start;
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is an unreserved character.
+   *
+   * Parses the rule:
+   *
+   * 
unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+   */
+  private boolean unreserved() {
+    return (this.alpha()
+        || this.digit()
+        || this.take('-')
+        || this.take('_')
+        || this.take('.')
+        || this.take('~'));
+  }
+
+  /**
+   * Determines whether the current position is a sub-delim.
+   *
+   * Parses the rule:
+   *
+   * 
sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
+   *                  / "*" / "+" / "," / ";" / "="
+   */
+  private boolean subDelims() {
+    return (this.take('!')
+        || this.take('$')
+        || this.take('&')
+        || this.take('\'')
+        || this.take('(')
+        || this.take(')')
+        || this.take('*')
+        || this.take('+')
+        || this.take(',')
+        || this.take(';')
+        || this.take('='));
+  }
+
+  /**
+   * Determines whether the current position is an alpha character.
+   *
+   * Parses the rule:
+   *
+   * 
ALPHA =  %x41-5A / %x61-7A ; A-Z / a-z
+   */
+  private boolean alpha() {
+    if (this.index >= this.str.length()) {
+      return false;
+    }
+
+    char c = this.str.charAt(this.index);
+
+    if (('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z')) {
+      this.index++;
+      return true;
+    }
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a hex digit.
+   *
+   * 

Parses the rule: + * + *

HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
+   */
+  private boolean hexDig() {
+    if (this.index >= this.str.length()) {
+      return false;
+    }
+
+    char c = this.str.charAt(this.index);
+
+    if (('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) {
+      this.index++;
+      return true;
+    }
+
+    return false;
+  }
+
+  /**
+   * Determines whether the current position is a digit.
+   *
+   * 

Parses the rule: + * + *

DIGIT = %x30-39 ; 0-9
+   */
+  private boolean digit() {
+    if (this.index >= this.str.length()) {
+      return false;
+    }
+
+    char c = this.str.charAt(this.index);
+    if ('0' <= c && c <= '9') {
+      this.index++;
+      return true;
+    }
+    return false;
+  }
+
+  /**
+   * Take the given char at the current position, incrementing the index if necessary.
+   */
+  private boolean take(char c) {
+    if (this.index >= this.str.length()) {
+      return false;
+    }
+
+    if (this.str.charAt(this.index) == c) {
+      this.index++;
+      return true;
+    }
+
+    return false;
+  }
+}