From b9fff3700b8ab3f13c72d58d89c75f943e98ecec Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 25 Mar 2025 14:11:03 -0400 Subject: [PATCH 01/11] URI/URI ref passing --- protovalidate/internal/extra_func.py | 730 +++++++++++++++++++++- tests/conformance/nonconforming.yaml | 881 --------------------------- tests/extra_func_test.py | 24 + 3 files changed, 735 insertions(+), 900 deletions(-) create mode 100644 tests/extra_func_test.py diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index cf211c75..2625d9a0 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -13,6 +13,7 @@ # limitations under the License. import math +import sys import typing from email.utils import parseaddr from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network, ip_address, ip_network @@ -164,28 +165,13 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: - url = urlparse.urlparse(str(string)) - # urlparse correctly reads the scheme from URNs but parses everything - # after (except the query string) as the path. - if url.scheme == "urn": - if not (url.path): - return celtypes.BoolType(False) - elif not all([url.scheme, url.netloc, url.path]): - return celtypes.BoolType(False) - - # If the query string contains percent-encoding, then try to decode it. - # unquote will return the same string if it is improperly encoded. - if "%" in url.query: - return celtypes.BoolType(urlparse.unquote(url.query) != url.query) - - return celtypes.BoolType(True) + valid = Uri(str(string)).uri() + return celtypes.BoolType(valid) def is_uri_ref(string: celtypes.Value) -> celpy.Result: - url = urlparse.urlparse(str(string)) - if not all([url.scheme, url.path]) and url.fragment: - return celtypes.BoolType(False) - return celtypes.BoolType(True) + valid = Uri(str(string)).uri_reference() + return celtypes.BoolType(valid) def is_hostname(string: celtypes.Value) -> celpy.Result: @@ -237,6 +223,712 @@ def unique(val: celtypes.Value) -> celpy.Result: return celtypes.BoolType(len(val) == len(set(val))) +class Uri: + _string: str + _index: int + _pct_encoded_found: bool + + def log(self, string: str): + print("index is {} -- {}".format(self._index, string), file=sys.stderr) + + def __init__(self, string: str): + super().__init__() + self._string = string + self._index = 0 + + def uri(self) -> bool: + """Determines whether string is a valid URI. + + Method parses the rule: + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + """ + start = self._index + if not (self.__scheme() and self.__take(":") and self.__hier_part()): + self._index = start + return False + + if self.__take("?") and not self.__query(): + return False + + if self.__take("#") and not self.__fragment(): + return False + + if self._index != len(self._string): + self._index = start + return False + + return True + + def uri_reference(self) -> bool: + """Determines whether string is a valid URI reference. + + Method parses the rule: + URI-reference = URI / relative-ref + """ + return self.uri() or self.__relative_ref() + + def __hier_part(self) -> bool: + """Determines whether string contains a valid hier-part. + + Method parses the rule: + + hier-part = "//" authority path-abempty. + / path-absolute + / path-rootless + / path-empty + """ + start = self._index + if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): + return True + + self._index = start + + self.log("made it here, which is bad") + + return self.__path_absolute() or self.__path_rootless() or self.__path_empty() + + def __relative_ref(self) -> bool: + """Determines whether string contains a valid relative reference. + + Method parses the rule: + + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + """ + start = self._index + if not self.__relative_part(): + return False + + if self.__take("?") and not self.__query(): + self._index = start + return False + + if self.__take("#") and not self.__fragment(): + self._index = start + return False + + if self._index != len(self._string): + self._index = start + return False + + return True + + def __relative_part(self) -> bool: + """Determines whether string contains a valid relative part. + + Method parses the rule: + + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + """ + + start = self._index + if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): + return True + + self._index = start + + return self.__path_absolute() or self.__path_noscheme() or self.__path_empty() + + def __scheme(self) -> bool: + """Determines whether string contains a valid scheme. + + Method parses the rule: + + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + + Terminated by ":". + """ + + start = self._index + if self.__alpha(): + while self.__alpha() or self.__digit() or self.__take("+") or self.__take("-") or self.__take("."): + pass + + if self._string[self._index] == ":": + return True + + self._index = start + return False + + def __authority(self) -> bool: + """Determines whether string contains a valid authority. + + Method parses the rule: + + authority = [ userinfo "@" ] host [ ":" port ] + + Lead by double slash ("") and terminated by "/", "?", "#", or end of URI. + """ + + start = self._index + if self.__userinfo(): + if not self.__take("@"): + self._index = start + self.log("done with userinfo") + return False + + self.log("checking host") + if not self.__host(): + self._index = start + self.log("not a host") + return False + + if self.__take(":"): + if not self.__port(): + self._index = start + return False + + self.log("is auth end check") + if not self.__is_authority_end(): + self.log("not a auth end") + self._index = start + return False + + self.log("we passed") + return True + + def __is_authority_end(self) -> bool: + """Reports whether the current position is the end of the authority. + + The authority component [...] is terminated by the next slash ("/"), + question mark ("?"), or number sign ("#") character, or by the + end of the URI. + """ + + return ( + self._index >= len(self._string) + or self._string[self._index] == "?" + or self._string[self._index] == "#" + or self._string[self._index] == "/" + ) + + def __userinfo(self) -> bool: + """Determines whether string contains a valid userinfo. + + Method parses the rule: + + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + + Terminated by "@" in authority. + """ + start = self._index + while True: + if self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":"): + continue + + if self._index < len(self._string): + if self._string[self._index] == "@": + return True + + self._index = start + return False + + def __check_host_pct_encoded(self, string: str) -> bool: + """Verifies that string is correctly percent-encoded""" + try: + # unquote defaults to 'UTF-8' encoding. + urlparse.unquote(string, errors="strict") + except UnicodeError: + return False + + return True + + def __host(self) -> bool: + """Determines whether string contains a valid host. + + host parses the rule: + + host = IP-literal / IPv4address / reg-name. + """ + if self._index >= len(self._string): + return False + + start = self._index + self._pct_encoded_found = False + + # Note: IPv4address is a subset of reg-name + if (self._string[self._index] == "[" and self.__ip_literal()) or self.__reg_name(): + if self._pct_encoded_found: + raw_host = self._string[start : self._index] + # RFC 3986: + # > URI producing applications must not use percent-encoding in host + # > unless it is used to represent a UTF-8 character sequence. + if not self.__check_host_pct_encoded(raw_host): + return False + + return True + + return False + + def __port(self) -> bool: + """Determines whether string contains a valid port. + + host parses the rule: + + port = *DIGIT + + Terminated by end of authority. + """ + start = self._index + while True: + if self.__digit(): + continue + + if self.__is_authority_end(): + return True + + self._index = start + return False + + def __ip_literal(self) -> bool: + """Determines whether string contains a valid port. + + ip_literal parses the rule from RFC 6874: + + IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" + """ + + start = self._index + + if self.__take("["): + curr_idx = self._index + if self.__ipv6_address() and self.__take("]"): + return True + + self._index = curr_idx + + if self.__ipv6_addrz() and self.__take("]"): + return True + + self._index = curr_idx + + if self.__ip_vfuture() and self.__take("]"): + return True + + self._index = start + return False + + def __ipv6_address(self) -> bool: + """Determines whether string contains a valid ipv6 address. + + Method parses the rule "IPv6address". + + Relies on the implementation of is_ip. + """ + start = self._index + while self.__hex_dig() or self.__take(":"): + pass + + if validate_ip(self._string[start : self._index], 6): + return True + + self._index = start + return False + + def __ipv6_addrz(self) -> bool: + """Determines whether string contains a valid IPv6addrz. + + RFC 6874: + + IPv6addrz = IPv6address "%25" ZoneID + """ + start = self._index + if self.__ipv6_address() and self.__take("%") and self.__take("2") and self.__take("5") and self.__zone_id(): + return True + + self._index = start + + return False + + def __zone_id(self) -> bool: + """Determines whether string contains a valid zone ID. + + RFC 6874: + + ZoneID = 1*( unreserved / pct-encoded ) + """ + + start = self._index + while self.__unreserved() or self.__pct_encoded(): + pass + + if self._index - start > 0: + return True + + self._index = start + + return False + + def __ip_vfuture(self) -> bool: + """Determines whether string contains a valid ipvFuture. + + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + """ + start = self._index + + if self.__take("v") and self.__hex_dig(): + while self.__hex_dig(): + pass + + if self.__take("."): + j = 0 + while self.__unreserved() or self.__sub_delims() or self.__take(":"): + j += 1 + + if j >= 1: + return True + + self._index = start + + return False + + def __reg_name(self) -> bool: + """Determines whether string contains a valid reg-name. + + reg-name = *( unreserved / pct-encoded / sub-delims ) + + Terminates on start of port (":") or end of authority. + """ + start = self._index + while True: + if self.__unreserved() or self.__pct_encoded() or self.__sub_delims(): + continue + + if self.__is_authority_end(): + # End of authority + return True + + if self._string[self._index] == ":": + return True + + self._index = start + + return False + + def __is_path_end(self) -> bool: + """Determines whether the current index has reached the end of path. + + > The path is terminated by the first question mark ("?") or + > number sign ("#") character, or by the end of the URI. + """ + return self._index >= len(self._string) or self._string[self._index] == "?" or self._string[self._index] == "#" + + def __path_abempty(self) -> bool: + """Determines whether string contains a path-abempty. + + path-abempty = *( "/" segment ) + + Terminated by end of path: "?", "#", or end of URI. + """ + start = self._index + while self.__take("/") and self.__segment(): + pass + + self.log("done with segment loop") + if self.__is_path_end(): + return True + + self._index = start + + return False + + def __path_absolute(self) -> bool: + """Determines whether string contains a path-absolute. + + path-absolute = "/" [ segment-nz *( "/" segment ) ] + + Terminated by end of path: "?", "#", or end of URI. + """ + start = self._index + + if self.__take("/"): + if self.__segment_nz(): + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + + return False + + def __path_noscheme(self) -> bool: + """Determines whether string contains a path-noscheme. + + path-noscheme = segment-nz-nc *( "/" segment ) + + Terminated by end of path: "?", "#", or end of URI. + """ + + start = self._index + if self.__segment_nz_nc(): + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + + return True + + def __path_rootless(self) -> bool: + """Determines whether string contains a path-rootless. + + path-rootless = segment-nz *( "/" segment ) + + Terminated by end of path: "?", "#", or end of URI. + """ + start = self._index + + if self.__segment_nz(): + while self.__take("/") and self.__segment(): + pass + + if self.__is_path_end(): + return True + + self._index = start + + return True + + def __path_empty(self) -> bool: + """Determines whether string contains a path-empty. + + path-empty = 0 + + Terminated by end of path: "?", "#", or end of URI. + """ + return self.__is_path_end() + + def __segment(self) -> bool: + """Determines whether string contains a segment. + + segment = *pchar + """ + + while self.__pchar(): + pass + + return True + + def __segment_nz(self) -> bool: + """Determines whether string contains a segment-nz. + + segment-nz = 1*pchar + """ + start = self._index + + if self.__pchar(): + while self.__pchar(): + pass + + return True + + self._index = start + + return False + + def __segment_nz_nc(self) -> bool: + """Determines whether string contains a segment-nz-nc. + + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + """ + + start = self._index + + while self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take("@"): + pass + + if self._index - start > 0: + return True + + self._index = start + + return False + + def __pchar(self) -> bool: + """Determines whether the character at the current index is a pchar. + + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + """ + return ( + self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":") or self.__take("@") + ) + + def __query(self) -> bool: + """Determines whether string contains a valid query. + + query = *( pchar / "/" / "?" ) + + Terminated by "#" or end of URI. + """ + + start = self._index + + while True: + if self.__pchar() or self.__take("/") or self.__take("?"): + continue + + if self._index == len(self._string) or self._string[self._index] == "#": + return True + + self._index = start + + return False + + def __fragment(self) -> bool: + """Determines whether string contains a valid fragment. + + fragment = *( pchar / "/" / "?" ) + + Terminated by end of URI. + """ + + start = self._index + + while True: + if self.__pchar() or self.__take("/") or self.__take("?"): + continue + + if self._index == len(self._string): + return True + + self._index = start + + return False + + def __pct_encoded(self) -> bool: + """Determines whether string contains a valid percent encoding. + + pct-encoded = "%" HEXDIG HEXDIG + + Sets `_pct_encoded_found` to true if a valid triplet was found + """ + start = self._index + + if self.__take("%") and self.__hex_dig() and self.__hex_dig(): + self._pct_encoded_found = True + return True + + self._index = start + + return False + + def __unreserved(self) -> bool: + """Determines whether the character at the current index is unreserved. + + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + """ + return ( + self.__alpha() + or self.__digit() + or self.__take("-") + or self.__take("_") + or self.__take(".") + or self.__take("~") + ) + + def __sub_delims(self) -> bool: + """Determines whether the character at the current index is a sub-delim. + + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + """ + return ( + self.__take("!") + or self.__take("$") + or self.__take("&") + or self.__take("'") + or self.__take("(") + or self.__take(")") + or self.__take("*") + or self.__take("+") + or self.__take(",") + or self.__take(";") + or self.__take("=") + ) + + def __alpha(self) -> bool: + """Determines whether the character at the current index is an alpha char. + + alpha parses the rule: + + ALPHA = %x41-5A / %x61-7A ; A-Z / a-z + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if ("A" <= c <= "Z") or ("a" <= c <= "z"): + self._index += 1 + return True + + return False + + def __digit(self) -> bool: + """Determines whether the character at the current index is a digit. + + Method parses the rule: + + DIGIT = %x30-39 ; 0-9 + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if "0" <= c <= "9": + self._index += 1 + return True + + return False + + def __hex_dig(self) -> bool: + """Determines whether the character at the current index is a hex digit. + + Method parses the rule: + + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + + if ("0" <= c <= "9") or ("a" <= c <= "f") or ("A" <= c <= "F") or ("0" <= c <= "9"): + self._index += 1 + + return True + + return False + + def __take(self, char: str) -> bool: + """Take the given char at the current index. + + If char is at the current index, increment the index. + + Returns: + True if char is at the current index. False if char is not at the + current index or the end of string has been reached. + """ + + if self._index >= len(self._string): + return False + + if self._string[self._index] == char: + self._index += 1 + return True + + return False + + def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]: # TODO(#257): Fix types and add tests for StringFormat. # For now, ignoring the type. diff --git a/tests/conformance/nonconforming.yaml b/tests/conformance/nonconforming.yaml index 531cf553..47767dd6 100644 --- a/tests/conformance/nonconforming.yaml +++ b/tests/conformance/nonconforming.yaml @@ -166,887 +166,6 @@ library/is_ip_prefix: # want: validation error (1 violation) # 1. constraint_id: "library.is_ip_prefix" # got: valid -library/is_uri: - - invalid/authority_path-abempty_segment_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo://example.com/^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/authority_path-abempty_segment_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo://example.com/\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/authority_path-abempty_segment_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo://example.com/%x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: valid - - invalid/host_ipfuture - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1x]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ('IPvFuture address is invalid',)) - - invalid/host_ipv6/b - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[2001::0370::7334]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ("'2001::0370::7334' does not appear to be an IPv4 or IPv6 address",)) - - invalid/host_ipv6_zone-id_bad_pct-encoded/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ("'::1%25foo%' does not appear to be an IPv4 or IPv6 address",)) - - invalid/host_ipv6_zone-id_bad_pct-encoded/b - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%2x]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ("'::1%25foo%2x' does not appear to be an IPv4 or IPv6 address",)) - - invalid/host_ipv6_zone-id_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%c3x%96]"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ("'::1%25foo%c3x%96' does not appear to be an IPv4 or IPv6 address",)) - - invalid/userinfo_reserved_square_bracket_close - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://]@example.com"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ('Invalid IPv6 URL',)) - - invalid/userinfo_reserved_square_bracket_open - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[@example.com"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # got: runtime error: ('return error for overflow', , ('Invalid IPv6 URL',)) - - valid/authority_path-abempty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo://example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/example - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/fragment_pchar_extra - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com#/?"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/fragment_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com#%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/fragment_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com#%c3x%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/fragment_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com#%c3%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/fragment_sub-delims - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com#!$&'()*+,;="} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ip4v_bad_octet - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://256.0.0.1"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipfuture_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[vF.-!$&'()*+,;=._~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipfuture_long - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1234AF.x]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipfuture_short - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[v1.x]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipv4 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://127.0.0.1"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipv6 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipv6_zone-id - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25eth0]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_ipv6_zone-id_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%61%20%23]"} - # want: valid - # got: runtime error: ('return error for overflow', , ("'::1%25foo%61%20%23' does not appear to be an IPv4 or IPv6 address",)) - - valid/host_ipv6_zone-id_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://[::1%25foo%c3%96]"} - # want: valid - # got: runtime error: ('return error for overflow', , ("'::1%25foo%c3%96' does not appear to be an IPv4 or IPv6 address",)) - - valid/host_reg-name - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_reg-name_empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://:8080"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_reg-name_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://!$&'()*+,;=._~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_reg-name_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/host_reg-name_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://foo%c3%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_exhaust_segment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ%20!$&'()*+,;=:@%20"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_exhaust_segment-nz - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/@%20!$&()*+,;=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~:"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_segment-nz-pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_segment-nz-pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/%c3x%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_segment-nz-pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/%c3%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_segment_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz/%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_segment_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz/%c3x%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_segment_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz/%c3%96%c3"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_with_empty_pchar - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_with_query_and_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz?q#f"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_with_segment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz/a"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-absolute_with_segments - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:/nz//segment//segment/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-empty_with_query_and_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:?q#f"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment-nz_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:@%20!$&()*+,;=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~:"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment-nz_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment-nz_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:%c3x%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment-nz_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:%c3%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment_empty_pchar - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ%20!$&'()*+,;=:@%20"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz/%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz/%c3x%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_segment_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz/%c3%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_with_query_and_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz?q#f"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_with_segment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz/a"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/path-rootless_with_segments - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo:nz//segment//segment/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_0 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:0"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_1 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:1"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_65535 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:65535"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_65536 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:65536"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_8080 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:8080"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com:"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/port_empty_reg-name_empty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://:"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?baz=quux"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_extra - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?/?"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_pchar_extra - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?:@"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?%61%20%23"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_pct-encoded_invalid_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?%c3x%96"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?%c3%96%c3"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_sub-delim_semicolon - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?;"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_sub-delims - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?!$&'()*+,="} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/query_unusual_key_value_structure - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://example.com?a=b&c&&=1&=="} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/scheme_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"foo0123456789azAZ+-.://example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/scheme_ftp - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"ftp://example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_exhaust - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~!$&'()*+,;=::@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_extra - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://:@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_multiple_colons - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://:::@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_name - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://user@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_name_password - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://user:password@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_pct-encoded_ascii - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://%61%20%23@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_pct-encoded_invalid-utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://%c3x%963@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_pct-encoded_utf8 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://%c3%963@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_reserved_hash_parses_as_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://#@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_reserved_questionmark_parses_as_query - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://?@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_reserved_slash_parses_as_path-abempty - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https:///@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_sub-delims - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://!$&'()*+,;=@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false - - valid/userinfo_unreserved - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUri]:{val:"https://0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~@example.com"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri" - # message: "" - # for_key: false -library/is_uri_ref: - - invalid/authority_path-abempty_segment_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host/\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/bad_relative-part - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:":"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/leading_space - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:" ./foo"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-abempty_query_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host?^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-abempty_query_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host?\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-abempty_query_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host?%"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-abempty_segment_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host/%x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/foo/\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_query_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/?^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_query_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/?\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_query_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/?%"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_segment-nz_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_segment-nz_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_segment-nz_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/%x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_segment_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/nz/^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_segment_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/nz/\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-absolute_segment_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/nz/%x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-empty_query_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"?^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-empty_query_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"?\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-empty_query_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"?%"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./foo/\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_query_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:".?^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_query_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:".?\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_query_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:".?%"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment-bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment-nz_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment-nz_bad_colon - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:":"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment-nz_bad_control_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"\x1f"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment-nz_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"%x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment_bad_caret - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./^"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/path-noscheme_segment_bad_pct-encoded - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./%x"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/space - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:" "} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/trailing_space - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./foo "} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - invalid/uri_with_bad_scheme - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"1foo://example.com"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # got: valid - - valid/authority_path-abempty_with_segment_query_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host/foo?baz=quux#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/extreme - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//userinfo0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~!$&'()*+,;=::@host!$&'()*+,;=._~0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ:0123456789/path0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ%20!$&'()*+,;=:@%20//foo/?query0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?#fragment0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-abempty_exhaust_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-abempty_with_fragment/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-abempty_with_fragment/b - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"//host/foo/bar#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-absolute_exhaust_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-absolute_with_fragment/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-absolute_with_fragment/b - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"/foo/bar#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-empty_exhaust_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-noscheme_exhaust_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:".#0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~%20!$&'()*+,=;:@?/"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-noscheme_with_fragment/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:".#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-noscheme_with_fragment/b - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./foo/bar#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false - - valid/path-noscheme_with_segment_query_fragment - # input: [type.googleapis.com/buf.validate.conformance.cases.IsUriRef]:{val:"./foo/bar?baz=quux#frag"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_uri_ref" - # message: "" - # for_key: false standard_constraints/required: - proto2/scalar/optional/unset # input: [type.googleapis.com/buf.validate.conformance.cases.RequiredProto2ScalarOptional]:{} diff --git a/tests/extra_func_test.py b/tests/extra_func_test.py new file mode 100644 index 00000000..a10efb6e --- /dev/null +++ b/tests/extra_func_test.py @@ -0,0 +1,24 @@ +# Copyright 2023-2025 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from protovalidate.internal.extra_func import Uri + + +class TestFunc(unittest.TestCase): + def test_ninf(self): + uri = Uri("https://foo%c3x%96") + is_it = uri.uri() + self.assertFalse(is_it) From b7b07027754f94b31c28d982b479ba26cd0adaab Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 25 Mar 2025 14:28:17 -0400 Subject: [PATCH 02/11] Docs --- protovalidate/internal/extra_func.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index f566322d..5d8d6c98 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -221,6 +221,17 @@ def unique(val: celtypes.Value) -> celpy.Result: class Uri: + """Uri is a class used to validate a given string to determine if it is a valid URI or URI reference. + + Callers can validate a string by constructing an instance of this class and then calling one of its + public methods: + uri() + uri_reference() + + Each method will return True or False depending on whether it passes validation. + + """ + _string: str _index: int _pct_encoded_found: bool @@ -229,6 +240,11 @@ def log(self, string: str): print("index is {} -- {}".format(self._index, string), file=sys.stderr) def __init__(self, string: str): + """Initialize a URI validation class with a given string + + Args: + string (str): String to validate as a URI or URI reference. + """ super().__init__() self._string = string self._index = 0 @@ -366,10 +382,8 @@ def __authority(self) -> bool: self.log("done with userinfo") return False - self.log("checking host") if not self.__host(): self._index = start - self.log("not a host") return False if self.__take(":"): @@ -377,13 +391,10 @@ def __authority(self) -> bool: self._index = start return False - self.log("is auth end check") if not self.__is_authority_end(): - self.log("not a auth end") self._index = start return False - self.log("we passed") return True def __is_authority_end(self) -> bool: From f2b3cf9b405b111a41421391b919dec5a3837630 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 25 Mar 2025 14:29:20 -0400 Subject: [PATCH 03/11] Cleanup --- protovalidate/internal/extra_func.py | 8 -------- tests/extra_func_test.py | 24 ------------------------ 2 files changed, 32 deletions(-) delete mode 100644 tests/extra_func_test.py diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 5d8d6c98..c18400a0 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -14,7 +14,6 @@ import math import re -import sys import typing from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network, ip_address, ip_network from urllib import parse as urlparse @@ -236,9 +235,6 @@ class Uri: _index: int _pct_encoded_found: bool - def log(self, string: str): - print("index is {} -- {}".format(self._index, string), file=sys.stderr) - def __init__(self, string: str): """Initialize a URI validation class with a given string @@ -296,8 +292,6 @@ def __hier_part(self) -> bool: self._index = start - self.log("made it here, which is bad") - return self.__path_absolute() or self.__path_rootless() or self.__path_empty() def __relative_ref(self) -> bool: @@ -379,7 +373,6 @@ def __authority(self) -> bool: if self.__userinfo(): if not self.__take("@"): self._index = start - self.log("done with userinfo") return False if not self.__host(): @@ -634,7 +627,6 @@ def __path_abempty(self) -> bool: while self.__take("/") and self.__segment(): pass - self.log("done with segment loop") if self.__is_path_end(): return True diff --git a/tests/extra_func_test.py b/tests/extra_func_test.py deleted file mode 100644 index a10efb6e..00000000 --- a/tests/extra_func_test.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2023-2025 Buf Technologies, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from protovalidate.internal.extra_func import Uri - - -class TestFunc(unittest.TestCase): - def test_ninf(self): - uri = Uri("https://foo%c3x%96") - is_it = uri.uri() - self.assertFalse(is_it) From 3bc457ed96d862c038ccbc4c8db3f48921b2f4a7 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 25 Mar 2025 14:37:47 -0400 Subject: [PATCH 04/11] Cleanup --- protovalidate/internal/extra_func.py | 45 ++++++++++++++++++++-------- 1 file changed, 33 insertions(+), 12 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index c18400a0..2fe289bb 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -220,7 +220,7 @@ def unique(val: celtypes.Value) -> celpy.Result: class Uri: - """Uri is a class used to validate a given string to determine if it is a valid URI or URI reference. + """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference. Callers can validate a string by constructing an instance of this class and then calling one of its public methods: @@ -228,7 +228,6 @@ class Uri: uri_reference() Each method will return True or False depending on whether it passes validation. - """ _string: str @@ -241,6 +240,7 @@ def __init__(self, string: str): Args: string (str): String to validate as a URI or URI reference. """ + super().__init__() self._string = string self._index = 0 @@ -251,6 +251,7 @@ def uri(self) -> bool: Method parses the rule: URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] """ + start = self._index if not (self.__scheme() and self.__take(":") and self.__hier_part()): self._index = start @@ -274,6 +275,7 @@ def uri_reference(self) -> bool: Method parses the rule: URI-reference = URI / relative-ref """ + return self.uri() or self.__relative_ref() def __hier_part(self) -> bool: @@ -286,6 +288,7 @@ def __hier_part(self) -> bool: / path-rootless / path-empty """ + start = self._index if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): return True @@ -301,6 +304,7 @@ def __relative_ref(self) -> bool: relative-ref = relative-part [ "?" query ] [ "#" fragment ] """ + start = self._index if not self.__relative_part(): return False @@ -414,6 +418,7 @@ def __userinfo(self) -> bool: Terminated by "@" in authority. """ + start = self._index while True: if self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":"): @@ -443,6 +448,7 @@ def __host(self) -> bool: host = IP-literal / IPv4address / reg-name. """ + if self._index >= len(self._string): return False @@ -466,12 +472,13 @@ def __host(self) -> bool: def __port(self) -> bool: """Determines whether string contains a valid port. - host parses the rule: + Method parses the rule: port = *DIGIT Terminated by end of authority. """ + start = self._index while True: if self.__digit(): @@ -486,7 +493,7 @@ def __port(self) -> bool: def __ip_literal(self) -> bool: """Determines whether string contains a valid port. - ip_literal parses the rule from RFC 6874: + Method parses the rule from RFC 6874: IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" """ @@ -516,8 +523,9 @@ def __ipv6_address(self) -> bool: Method parses the rule "IPv6address". - Relies on the implementation of is_ip. + Relies on the implementation of validate_ip. """ + start = self._index while self.__hex_dig() or self.__take(":"): pass @@ -535,6 +543,7 @@ def __ipv6_addrz(self) -> bool: IPv6addrz = IPv6address "%25" ZoneID """ + start = self._index if self.__ipv6_address() and self.__take("%") and self.__take("2") and self.__take("5") and self.__zone_id(): return True @@ -567,6 +576,7 @@ def __ip_vfuture(self) -> bool: IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) """ + start = self._index if self.__take("v") and self.__hex_dig(): @@ -592,6 +602,7 @@ def __reg_name(self) -> bool: Terminates on start of port (":") or end of authority. """ + start = self._index while True: if self.__unreserved() or self.__pct_encoded() or self.__sub_delims(): @@ -614,6 +625,7 @@ def __is_path_end(self) -> bool: > The path is terminated by the first question mark ("?") or > number sign ("#") character, or by the end of the URI. """ + return self._index >= len(self._string) or self._string[self._index] == "?" or self._string[self._index] == "#" def __path_abempty(self) -> bool: @@ -623,6 +635,7 @@ def __path_abempty(self) -> bool: Terminated by end of path: "?", "#", or end of URI. """ + start = self._index while self.__take("/") and self.__segment(): pass @@ -641,6 +654,7 @@ def __path_absolute(self) -> bool: Terminated by end of path: "?", "#", or end of URI. """ + start = self._index if self.__take("/"): @@ -682,6 +696,7 @@ def __path_rootless(self) -> bool: Terminated by end of path: "?", "#", or end of URI. """ + start = self._index if self.__segment_nz(): @@ -702,6 +717,7 @@ def __path_empty(self) -> bool: Terminated by end of path: "?", "#", or end of URI. """ + return self.__is_path_end() def __segment(self) -> bool: @@ -720,6 +736,7 @@ def __segment_nz(self) -> bool: segment-nz = 1*pchar """ + start = self._index if self.__pchar(): @@ -752,10 +769,11 @@ def __segment_nz_nc(self) -> bool: return False def __pchar(self) -> bool: - """Determines whether the character at the current index is a pchar. + """Reports whether the current position is a pchar. pchar = unreserved / pct-encoded / sub-delims / ":" / "@" """ + return ( self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":") or self.__take("@") ) @@ -809,6 +827,7 @@ def __pct_encoded(self) -> bool: Sets `_pct_encoded_found` to true if a valid triplet was found """ + start = self._index if self.__take("%") and self.__hex_dig() and self.__hex_dig(): @@ -820,10 +839,11 @@ def __pct_encoded(self) -> bool: return False def __unreserved(self) -> bool: - """Determines whether the character at the current index is unreserved. + """Reports whether the current position is an unreserved character. unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" """ + return ( self.__alpha() or self.__digit() @@ -834,11 +854,12 @@ def __unreserved(self) -> bool: ) def __sub_delims(self) -> bool: - """Determines whether the character at the current index is a sub-delim. + """Reports whether the current position is a sub-delim. sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" """ + return ( self.__take("!") or self.__take("$") @@ -854,9 +875,9 @@ def __sub_delims(self) -> bool: ) def __alpha(self) -> bool: - """Determines whether the character at the current index is an alpha char. + """Reports whether the current position is an alpha character. - alpha parses the rule: + Method parses the rule: ALPHA = %x41-5A / %x61-7A ; A-Z / a-z """ @@ -872,7 +893,7 @@ def __alpha(self) -> bool: return False def __digit(self) -> bool: - """Determines whether the character at the current index is a digit. + """Reports whether the current position is a digit. Method parses the rule: @@ -890,7 +911,7 @@ def __digit(self) -> bool: return False def __hex_dig(self) -> bool: - """Determines whether the character at the current index is a hex digit. + """Reports whether the current position is a hex digit. Method parses the rule: From 8707a6c65aef58d94e6559de4e6dc2fb529ea18f Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Tue, 25 Mar 2025 14:43:52 -0400 Subject: [PATCH 05/11] Cleanup --- protovalidate/internal/extra_func.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 2fe289bb..6d25c6d9 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -161,11 +161,13 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: + """is_uri validates whether string is a valid URI.""" valid = Uri(str(string)).uri() return celtypes.BoolType(valid) def is_uri_ref(string: celtypes.Value) -> celpy.Result: + """is_uri_reference validates whether string is a valid URI reference.""" valid = Uri(str(string)).uri_reference() return celtypes.BoolType(valid) From 10cf420b8c7298a407733322f313a9291094c2ff Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Wed, 26 Mar 2025 10:20:23 -0400 Subject: [PATCH 06/11] Is Ip --- protovalidate/internal/extra_func.py | 304 +++++++++++++++++++++++---- tests/extra_func_test.py | 28 +++ 2 files changed, 296 insertions(+), 36 deletions(-) create mode 100644 tests/extra_func_test.py diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 6d25c6d9..4c9baf73 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -15,7 +15,7 @@ import math import re import typing -from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network, ip_address, ip_network +from ipaddress import IPv4Network, IPv6Network, ip_network from urllib import parse as urlparse import celpy @@ -63,17 +63,17 @@ def validate_host_and_port(string: str, *, port_required: bool) -> bool: end = string.find("]") after_end = end + 1 if after_end == len(string): # no port - return not port_required and validate_ip(string[1:end], 6) + return not port_required and _is_ip(string[1:end], 6) if after_end == split_idx: # port - return validate_ip(string[1:end]) and validate_port(string[split_idx + 1 :]) + return _is_ip(string[1:end]) and validate_port(string[split_idx + 1 :]) return False # malformed if split_idx == -1: - return not port_required and (_validate_hostname(string) or validate_ip(string, 4)) + return not port_required and (_validate_hostname(string) or _is_ip(string, 4)) host = string[:split_idx] port = string[split_idx + 1 :] - return (_validate_hostname(host) or validate_ip(host, 4)) and validate_port(port) + return (_validate_hostname(host) or _is_ip(host, 4)) and validate_port(port) def validate_port(val: str) -> bool: @@ -84,30 +84,25 @@ def validate_port(val: str) -> bool: return False -def validate_ip(val: typing.Union[str, bytes], version: typing.Optional[int] = None) -> bool: - try: - if version is None: - ip_address(val) - elif version == 4: - IPv4Address(val) - elif version == 6: - IPv6Address(val) - else: - msg = "invalid argument, expected 4 or 6" - raise celpy.CELEvalError(msg) - return True - except ValueError: - return False - - -def is_ip(val: celtypes.Value, version: typing.Optional[celtypes.Value] = None) -> celpy.Result: - if not isinstance(val, (celtypes.BytesType, celtypes.StringType)): - msg = "invalid argument, expected string or bytes" +def validate_ip(val: celtypes.Value, version: typing.Optional[celtypes.Value] = None) -> celpy.Result: + if not isinstance(val, celtypes.StringType): + msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) if not isinstance(version, celtypes.IntType) and version is not None: msg = "invalid argument, expected int" raise celpy.CELEvalError(msg) - return celtypes.BoolType(validate_ip(val, version)) + return celtypes.BoolType(_is_ip(val, version)) + + +def _is_ip(val: str, version: typing.Optional[int] = None) -> bool: + if version is None or version == 0: + return Ipv4(val).address() or Ipv6(val).address() + elif version == 4: + return Ipv4(val).address() + elif version == 6: + return Ipv6(val).address() + else: + return False def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: @@ -160,16 +155,28 @@ def is_email(string: celtypes.Value) -> celpy.Result: return celtypes.BoolType(m) -def is_uri(string: celtypes.Value) -> celpy.Result: +def validate_uri(string: celtypes.Value) -> celpy.Result: + if not isinstance(string, celtypes.StringType): + msg = "invalid argument, expected string" + raise celpy.CELEvalError(msg) + return celtypes.BoolType(_is_uri(string)) + + +def _is_uri(string: str) -> bool: """is_uri validates whether string is a valid URI.""" - valid = Uri(str(string)).uri() - return celtypes.BoolType(valid) + return Uri(str(string)).uri() + + +def validate_uri_ref(string: celtypes.Value) -> celpy.Result: + if not isinstance(string, celtypes.StringType): + msg = "invalid argument, expected string" + raise celpy.CELEvalError(msg) + return celtypes.BoolType(_is_uri_ref(string)) -def is_uri_ref(string: celtypes.Value) -> celpy.Result: +def _is_uri_ref(string: celtypes.Value) -> bool: """is_uri_reference validates whether string is a valid URI reference.""" - valid = Uri(str(string)).uri_reference() - return celtypes.BoolType(valid) + return Uri(str(string)).uri_reference() def is_hostname(string: celtypes.Value) -> celpy.Result: @@ -221,6 +228,231 @@ def unique(val: celtypes.Value) -> celpy.Result: return celtypes.BoolType(len(val) == len(set(val))) +class Ipv4: + """a class""" + + _string: str + _index: int + _octets: bytearray + _prefix_len: int + + def __init__(self, string: str): + """ipv4 + + Args: + """ + + super().__init__() + self._string = string + self._index = 0 + self._octets = bytearray() + self._prefix_len = 0 + + def address(self) -> bool: + """Parses an IPv4 Address in dotted decimal notation.""" + return self.__address_part() and self._index == len(self._string) + + def address_prefix(self) -> bool: + """Parses an IPv4 Address prefix.""" + return ( + self.__address_part() and self.__take("/") and self.__prefix_length() and self._index == len(self._string) + ) + + def __get_bits(self) -> int: + """Get the bits of an address parsed through address() or address_prefix() + + Returns: + The 32-bit value if address was parsed successfully. 0 if not successful. + """ + if len(self._octets) != 4: + return 0 + + return (self._octets[0] << 24) | (self._octets[1] << 16) | (self._octets[2] << 8) | self._octets[3] + + def __is_prefix_only(self) -> bool: + """Determines TODO + + Behavior is undefined if address_prefix() has not been called before or has returned false. + + Returns: + True if all bits to the right of the prefix-length are all zeros. False otherwise. + """ + bits = self.__get_bits() + + mask: int + if self._prefix_len == 32: + mask = 0xFFFFFFFF + else: + mask = ~(0xFFFFFFFF >> self._prefix_len) + + masked = bits & mask + + return bits == masked + + def __prefix_length(self) -> bool: + start = self._index + + while True: + if self._index >= len(self._string) or not self.__digit(): + break + + if self._index - start > 2: + # max prefix-length is 32 bits, so anything more than 2 digits is invalid + return False + + string = self._string[start : self._index] + if len(string) == 0: + # too short + return False + + if len(string) > 1 and string[0] == "0": + # bad leading 0 + return False + + try: + value = int(string) + + if value > 32: + # max 32 bits + return False + + self._prefix_len = value + + return True + + except ValueError: + # Error converting to number + return False + + def __address_part(self) -> bool: + start = self._index + + if ( + self.__dec_octet() + and self.__take(".") + and self.__dec_octet() + and self.__take(".") + and self.__dec_octet() + and self.__take(".") + and self.__dec_octet() + ): + return True + + self._index = start + + return False + + def __dec_octet(self) -> bool: + start = self._index + + while True: + if self._index >= len(self._string) or not self.__digit(): + break + + if self._index - start > 3: + # decimal octet can be three characters at most + return False + + string = self._string[start : self._index] + + if len(string) == 0: + # too short + return False + + if len(string) > 1 and string[0] == "0": + # bad leading 0 + return False + + try: + value = int(string) + + if value > 255: + return False + + self._octets.append(value) + + return True + + except ValueError: + # Error converting to number + return False + + def __digit(self) -> bool: + """Reports whether the current position is a digit. + + Method parses the rule: + + DIGIT = %x30-39 ; 0-9 + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if "0" <= c <= "9": + self._index += 1 + return True + + return False + + def __take(self, char: str) -> bool: + """Take the given char at the current index. + + If char is at the current index, increment the index. + + Returns: + True if char is at the current index. False if char is not at the + current index or the end of string has been reached. + """ + + if self._index >= len(self._string): + return False + + if self._string[self._index] == char: + self._index += 1 + return True + + return False + + +class Ipv6: + """a class""" + + _string: str + _index: int + _pieces: list[int] + _double_colon_at: int + _double_colon_seen: bool + _dotted_raw: str + _dotted_addr: typing.Optional[Ipv4] + _zone_id_found: bool + _prefix_len: int + + def __init__(self, string: str): + """ipv6 + + Args: + + Attributes: + _string (str): The string to parse. + _index (int): The index. + _pieces (list[int]): 16-bit pieces found. + _double_colon_at (bool): Number of 16-bit pieces found when double colon was found. + _double_colon_seen (bool): Whether a double colon has been seen in string. + _dotted_raw (str): Dotted notation for right-most 32 bits. + _dotted_addr (typing.Optional[Ipv4]): Dotted notation successfully parsed as Ipv4. + _zone_id_found (bool): Whether a zone ID has been found in string. + _prefix_len (int): 0 - 128 + """ + + super().__init__() + self._string = string + self._double_colon_at = -1 + + def address(self) -> bool: + return True + + class Uri: """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference. @@ -525,14 +757,14 @@ def __ipv6_address(self) -> bool: Method parses the rule "IPv6address". - Relies on the implementation of validate_ip. + Relies on the implementation of _is_ip. """ start = self._index while self.__hex_dig() or self.__take(":"): pass - if validate_ip(self._string[start : self._index], 6): + if _is_ip(self._string[start : self._index], 6): return True self._index = start @@ -962,11 +1194,11 @@ def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]: # protovalidate specific functions "isNan": is_nan, "isInf": is_inf, - "isIp": is_ip, + "isIp": validate_ip, "isIpPrefix": is_ip_prefix, "isEmail": is_email, - "isUri": is_uri, - "isUriRef": is_uri_ref, + "isUri": validate_uri, + "isUriRef": validate_uri_ref, "isHostname": is_hostname, "isHostAndPort": is_host_and_port, "unique": unique, diff --git a/tests/extra_func_test.py b/tests/extra_func_test.py new file mode 100644 index 00000000..735ffeb9 --- /dev/null +++ b/tests/extra_func_test.py @@ -0,0 +1,28 @@ +# Copyright 2023-2025 Buf Technologies, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +from protovalidate.internal.extra_func import Ipv4, Uri + + +class TestFunc(unittest.TestCase): + def test_ninf(self): + uri = Uri("https://foo%c3x%96") + is_it = uri.uri() + self.assertFalse(is_it) + + def test_is_ip(self): + v4 = Ipv4("127.0.0.1").address() + self.assertTrue(v4) From b55fc7d92a1ab588c9f2fdca0a187283357a2ff3 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Wed, 26 Mar 2025 10:54:47 -0400 Subject: [PATCH 07/11] Fix docs --- protovalidate/internal/extra_func.py | 162 ++++++++++++++++++++------- 1 file changed, 120 insertions(+), 42 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 6d25c6d9..09ba69e3 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -145,12 +145,21 @@ def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: def is_email(string: celtypes.Value) -> celpy.Result: - """Returns true if the string is an email address, for example "foo@example.com". + """Validate whether string is a valid email address. Conforms to the definition for a valid email address from the HTML standard. Note that this standard willfully deviates from RFC 5322, which allows many unexpected forms of email addresses and will easily match a typographical error. + + Args: + string (celTypes.Value): The string to validate. + + Returns: + True if the string is an email address, for example "foo@example.com". False otherwise. + + Raises: + celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ if not isinstance(string, celtypes.StringType): @@ -161,13 +170,48 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: - """is_uri validates whether string is a valid URI.""" + """Validate whether string is a valid URI. + + URI is defined in the internet standard RFC 3986. + Zone Identifiers in IPv6 address literals are supported (RFC 6874). + + Args: + string (celTypes.Value): The string to validate. + + Returns: + True if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". False otherwise. + + Raises: + celpy.CELEvalError: If string is not an instance of celtypes.StringType. + """ + + if not isinstance(string, celtypes.StringType): + msg = "invalid argument, expected string" + raise celpy.CELEvalError(msg) valid = Uri(str(string)).uri() return celtypes.BoolType(valid) def is_uri_ref(string: celtypes.Value) -> celpy.Result: - """is_uri_reference validates whether string is a valid URI reference.""" + """Validate whether string is a valid URI reference. + + URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. + Zone Identifiers in IPv6 address literals are supported (RFC 6874). + + Args: + string (celTypes.Value): The string to validate. + + Returns: + True if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" + or a Relative Reference such as "./foo/bar?query". False otherwise. + + Raises: + celpy.CELEvalError: If string is not an instance of celtypes.StringType. + """ + + if not isinstance(string, celtypes.StringType): + msg = "invalid argument, expected string" + raise celpy.CELEvalError(msg) valid = Uri(str(string)).uri_reference() return celtypes.BoolType(valid) @@ -248,9 +292,10 @@ def __init__(self, string: str): self._index = 0 def uri(self) -> bool: - """Determines whether string is a valid URI. + """Determine whether string is a valid URI. Method parses the rule: + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] """ @@ -272,16 +317,17 @@ def uri(self) -> bool: return True def uri_reference(self) -> bool: - """Determines whether string is a valid URI reference. + """Determine whether string is a valid URI reference. Method parses the rule: + URI-reference = URI / relative-ref """ return self.uri() or self.__relative_ref() def __hier_part(self) -> bool: - """Determines whether string contains a valid hier-part. + """Determine whether string contains a valid hier-part. Method parses the rule: @@ -300,7 +346,7 @@ def __hier_part(self) -> bool: return self.__path_absolute() or self.__path_rootless() or self.__path_empty() def __relative_ref(self) -> bool: - """Determines whether string contains a valid relative reference. + """Determine whether string contains a valid relative reference. Method parses the rule: @@ -326,7 +372,7 @@ def __relative_ref(self) -> bool: return True def __relative_part(self) -> bool: - """Determines whether string contains a valid relative part. + """Determine whether string contains a valid relative part. Method parses the rule: @@ -345,7 +391,7 @@ def __relative_part(self) -> bool: return self.__path_absolute() or self.__path_noscheme() or self.__path_empty() def __scheme(self) -> bool: - """Determines whether string contains a valid scheme. + """Determine whether string contains a valid scheme. Method parses the rule: @@ -366,7 +412,7 @@ def __scheme(self) -> bool: return False def __authority(self) -> bool: - """Determines whether string contains a valid authority. + """Determine whether string contains a valid authority. Method parses the rule: @@ -397,7 +443,7 @@ def __authority(self) -> bool: return True def __is_authority_end(self) -> bool: - """Reports whether the current position is the end of the authority. + """Report whether the current position is the end of the authority. The authority component [...] is terminated by the next slash ("/"), question mark ("?"), or number sign ("#") character, or by the @@ -412,7 +458,7 @@ def __is_authority_end(self) -> bool: ) def __userinfo(self) -> bool: - """Determines whether string contains a valid userinfo. + """Determine whether string contains a valid userinfo. Method parses the rule: @@ -434,7 +480,7 @@ def __userinfo(self) -> bool: return False def __check_host_pct_encoded(self, string: str) -> bool: - """Verifies that string is correctly percent-encoded""" + """Verify that string is correctly percent-encoded""" try: # unquote defaults to 'UTF-8' encoding. urlparse.unquote(string, errors="strict") @@ -444,9 +490,9 @@ def __check_host_pct_encoded(self, string: str) -> bool: return True def __host(self) -> bool: - """Determines whether string contains a valid host. + """Determine whether string contains a valid host. - host parses the rule: + Method parses the rule: host = IP-literal / IPv4address / reg-name. """ @@ -472,7 +518,7 @@ def __host(self) -> bool: return False def __port(self) -> bool: - """Determines whether string contains a valid port. + """Determine whether string contains a valid port. Method parses the rule: @@ -493,7 +539,7 @@ def __port(self) -> bool: return False def __ip_literal(self) -> bool: - """Determines whether string contains a valid port. + """Determine whether string contains a valid port. Method parses the rule from RFC 6874: @@ -521,7 +567,7 @@ def __ip_literal(self) -> bool: return False def __ipv6_address(self) -> bool: - """Determines whether string contains a valid ipv6 address. + """Determine whether string contains a valid ipv6 address. Method parses the rule "IPv6address". @@ -539,9 +585,9 @@ def __ipv6_address(self) -> bool: return False def __ipv6_addrz(self) -> bool: - """Determines whether string contains a valid IPv6addrz. + """Determine whether string contains a valid IPv6addrz. - RFC 6874: + Method parses the rule from RFC 6874: IPv6addrz = IPv6address "%25" ZoneID """ @@ -555,9 +601,9 @@ def __ipv6_addrz(self) -> bool: return False def __zone_id(self) -> bool: - """Determines whether string contains a valid zone ID. + """Determine whether string contains a valid zone ID. - RFC 6874: + Method parses the rule from RFC 6874: ZoneID = 1*( unreserved / pct-encoded ) """ @@ -574,7 +620,9 @@ def __zone_id(self) -> bool: return False def __ip_vfuture(self) -> bool: - """Determines whether string contains a valid ipvFuture. + """Determine whether string contains a valid ipvFuture. + + Method parses the rule: IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) """ @@ -598,7 +646,9 @@ def __ip_vfuture(self) -> bool: return False def __reg_name(self) -> bool: - """Determines whether string contains a valid reg-name. + """Determine whether string contains a valid reg-name. + + Method parses the rule: reg-name = *( unreserved / pct-encoded / sub-delims ) @@ -622,7 +672,7 @@ def __reg_name(self) -> bool: return False def __is_path_end(self) -> bool: - """Determines whether the current index has reached the end of path. + """Determine whether the current index has reached the end of path. > The path is terminated by the first question mark ("?") or > number sign ("#") character, or by the end of the URI. @@ -631,7 +681,9 @@ def __is_path_end(self) -> bool: return self._index >= len(self._string) or self._string[self._index] == "?" or self._string[self._index] == "#" def __path_abempty(self) -> bool: - """Determines whether string contains a path-abempty. + """Determine whether string contains a path-abempty. + + Method parses the rule: path-abempty = *( "/" segment ) @@ -650,7 +702,9 @@ def __path_abempty(self) -> bool: return False def __path_absolute(self) -> bool: - """Determines whether string contains a path-absolute. + """Determine whether string contains a path-absolute. + + Method parses the rule: path-absolute = "/" [ segment-nz *( "/" segment ) ] @@ -672,7 +726,9 @@ def __path_absolute(self) -> bool: return False def __path_noscheme(self) -> bool: - """Determines whether string contains a path-noscheme. + """Determine whether string contains a path-noscheme. + + Method parses the rule: path-noscheme = segment-nz-nc *( "/" segment ) @@ -692,7 +748,9 @@ def __path_noscheme(self) -> bool: return True def __path_rootless(self) -> bool: - """Determines whether string contains a path-rootless. + """Determine whether string contains a path-rootless. + + Method parses the rule: path-rootless = segment-nz *( "/" segment ) @@ -713,7 +771,9 @@ def __path_rootless(self) -> bool: return True def __path_empty(self) -> bool: - """Determines whether string contains a path-empty. + """Determine whether string contains a path-empty. + + Method parses the rule: path-empty = 0 @@ -723,7 +783,9 @@ def __path_empty(self) -> bool: return self.__is_path_end() def __segment(self) -> bool: - """Determines whether string contains a segment. + """Determine whether string contains a segment. + + Method parses the rule: segment = *pchar """ @@ -734,7 +796,9 @@ def __segment(self) -> bool: return True def __segment_nz(self) -> bool: - """Determines whether string contains a segment-nz. + """Determine whether string contains a segment-nz. + + Method parses the rule: segment-nz = 1*pchar """ @@ -752,7 +816,9 @@ def __segment_nz(self) -> bool: return False def __segment_nz_nc(self) -> bool: - """Determines whether string contains a segment-nz-nc. + """Determine whether string contains a segment-nz-nc. + + Method parses the rule: segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) ; non-zero-length segment without any colon ":" @@ -771,7 +837,9 @@ def __segment_nz_nc(self) -> bool: return False def __pchar(self) -> bool: - """Reports whether the current position is a pchar. + """Report whether the current position is a pchar. + + Method parses the rule: pchar = unreserved / pct-encoded / sub-delims / ":" / "@" """ @@ -781,7 +849,9 @@ def __pchar(self) -> bool: ) def __query(self) -> bool: - """Determines whether string contains a valid query. + """Determine whether string contains a valid query. + + Method parses the rule: query = *( pchar / "/" / "?" ) @@ -802,7 +872,9 @@ def __query(self) -> bool: return False def __fragment(self) -> bool: - """Determines whether string contains a valid fragment. + """Determine whether string contains a valid fragment. + + Method parses the rule: fragment = *( pchar / "/" / "?" ) @@ -823,7 +895,9 @@ def __fragment(self) -> bool: return False def __pct_encoded(self) -> bool: - """Determines whether string contains a valid percent encoding. + """Determine whether string contains a valid percent encoding. + + Method parses the rule: pct-encoded = "%" HEXDIG HEXDIG @@ -841,7 +915,9 @@ def __pct_encoded(self) -> bool: return False def __unreserved(self) -> bool: - """Reports whether the current position is an unreserved character. + """Report whether the current position is an unreserved character. + + Method parses the rule: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" """ @@ -856,7 +932,9 @@ def __unreserved(self) -> bool: ) def __sub_delims(self) -> bool: - """Reports whether the current position is a sub-delim. + """Report whether the current position is a sub-delim. + + Method parses the rule: sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" @@ -877,7 +955,7 @@ def __sub_delims(self) -> bool: ) def __alpha(self) -> bool: - """Reports whether the current position is an alpha character. + """Report whether the current position is an alpha character. Method parses the rule: @@ -895,7 +973,7 @@ def __alpha(self) -> bool: return False def __digit(self) -> bool: - """Reports whether the current position is a digit. + """Report whether the current position is a digit. Method parses the rule: @@ -913,7 +991,7 @@ def __digit(self) -> bool: return False def __hex_dig(self) -> bool: - """Reports whether the current position is a hex digit. + """Report whether the current position is a hex digit. Method parses the rule: From c8f4284440e30590856745fed931b71a8442e36d Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Wed, 26 Mar 2025 14:45:52 -0400 Subject: [PATCH 08/11] IP --- protovalidate/internal/extra_func.py | 468 ++++++++++++++++++++++----- 1 file changed, 381 insertions(+), 87 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 1dead7e7..af8c52f3 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -15,7 +15,6 @@ import math import re import typing -from ipaddress import IPv4Network, IPv6Network, ip_network from urllib import parse as urlparse import celpy @@ -29,87 +28,62 @@ ) -def _validate_hostname(host): - if not host: - return False - if len(host) > 253: - return False - - if host[-1] == ".": - host = host[:-1] - - all_digits = True - for part in host.split("."): - if len(part) == 0 or len(part) > 63: - return False - - # Host names cannot begin or end with hyphens - if part[0] == "-" or part[-1] == "-": - return False - all_digits = True - for r in part: - if (r < "A" or r > "Z") and (r < "a" or r > "z") and (r < "0" or r > "9") and r != "-": - return False - all_digits = all_digits and "0" <= r <= "9" - return not all_digits - - -def validate_host_and_port(string: str, *, port_required: bool) -> bool: - if not string: - return False - - split_idx = string.rfind(":") - if string[0] == "[": - end = string.find("]") - after_end = end + 1 - if after_end == len(string): # no port - return not port_required and _is_ip(string[1:end], 6) - if after_end == split_idx: # port - return _is_ip(string[1:end]) and validate_port(string[split_idx + 1 :]) - return False # malformed +def is_ip(val: celtypes.Value, ver: typing.Optional[celtypes.Value] = None) -> celpy.Result: + """Validate whether a given string is a valid IP address according to an optional IP version. - if split_idx == -1: - return not port_required and (_validate_hostname(string) or _is_ip(string, 4)) + IPv4 addresses are expected in the dotted decimal format, for example "192.168.5.21". + IPv6 addresses are expected in their text representation, for example "::1" or "2001:0DB8:ABCD:0012::0". - host = string[:split_idx] - port = string[split_idx + 1 :] - return (_validate_hostname(host) or _is_ip(host, 4)) and validate_port(port) + Both formats are well-defined in the internet standard RFC 3986. Zone + identifiers for IPv6 addresses (for example "fe80::a%en1") are supported. + Args: + val (celTypes.Value): The string to validate. + version (typing.Optional[celtypes.Value]): An optional version to use for validating the IP address. + Passing None for a version of 0 means either 4 or 6. + Passing a version other than 0, 4, or 6 always returns False. -def validate_port(val: str) -> bool: - try: - port = int(val) - return port <= 65535 - except ValueError: - return False + Returns: + True if the string is an IPv4 or IPv6 address, optionally limited to a specific version. + Raises: + celpy.CELEvalError: If val is not an instance of celtypes.StringType or + if version is not an instance of celtypes.IntType and is not None. + """ -def validate_ip(val: celtypes.Value, version: typing.Optional[celtypes.Value] = None) -> celpy.Result: if not isinstance(val, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) - if not isinstance(version, celtypes.IntType) and version is not None: + if not isinstance(ver, celtypes.IntType) and ver is not None: msg = "invalid argument, expected int" raise celpy.CELEvalError(msg) + + if ver is None: + version = 0 + else: + version = ver + return celtypes.BoolType(_is_ip(val, version)) -def _is_ip(val: str, version: typing.Optional[int] = None) -> bool: - if version is None or version == 0: - return Ipv4(val).address() or Ipv6(val).address() +def _is_ip(string: str, version: int) -> bool: + """Internal implementation""" + valid = False + if version == 6: + valid = Ipv6(string).address() elif version == 4: - return Ipv4(val).address() - elif version == 6: - return Ipv6(val).address() - else: - return False + valid = Ipv4(string).address() + elif version == 0: + valid = Ipv4(string).address() or Ipv6(string).address() + + return valid def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: - if not isinstance(val, (celtypes.BytesType, celtypes.StringType)): + if not isinstance(val, celtypes.StringType): msg = "invalid argument, expected string or bytes" raise celpy.CELEvalError(msg) - version = None + version = 0 strict = celtypes.BoolType(False) if len(args) == 1 and isinstance(args[0], celtypes.BoolType): strict = args[0] @@ -124,19 +98,23 @@ def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: elif len(args) == 2 and (not isinstance(args[0], celtypes.IntType) or not isinstance(args[1], celtypes.BoolType)): msg = "invalid argument, expected int and bool" raise celpy.CELEvalError(msg) - try: - if version is None: - ip_network(val, strict=bool(strict)) - elif version == 4: - IPv4Network(val, strict=bool(strict)) - elif version == 6: - IPv6Network(val, strict=bool(strict)) - else: - msg = "invalid argument, expected 4 or 6" - raise celpy.CELEvalError(msg) - return celtypes.BoolType(True) - except ValueError: - return celtypes.BoolType(False) + + return celtypes.BoolType(_is_ip_prefix(val, version, strict=strict)) + + +def _is_ip_prefix(string: str, version: int, *, strict=False) -> bool: + """Internal implementation""" + valid = False + if version == 6: + v6 = Ipv6(string) + valid = v6.address_prefix() and (not strict or v6.is_prefix_only()) + elif version == 4: + v4 = Ipv4(string) + valid = v4.address_prefix() and (not strict or v4.is_prefix_only()) + elif version == 0: + valid = _is_ip_prefix(string, 6, strict=strict) or _is_ip_prefix(string, 4, strict=strict) + + return valid def is_email(string: celtypes.Value) -> celpy.Result: @@ -211,11 +189,60 @@ def is_uri_ref(string: celtypes.Value) -> celpy.Result: return celtypes.BoolType(valid) -def is_hostname(string: celtypes.Value) -> celpy.Result: - if not isinstance(string, celtypes.StringType): +def is_hostname(val: celtypes.Value) -> celpy.Result: + if not isinstance(val, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) - return celtypes.BoolType(_validate_hostname(string)) + return celtypes.BoolType(_is_hostname(val)) + + +def _is_hostname(val: str) -> bool: + if len(val) > 253: + return False + + if val.endswith(val.lower()): + string = val[0 : len(val) - 1] + else: + string = val + + all_digits = False + parts = string.lower().split(sep=".") + + # split hostname on '.' and validate each part + for part in parts: + all_digits = True + + # if part is empty, longer than 63 chars, or starts/ends with '-', it is invalid + part_len = len(part) + + if part_len == 0 or part_len > 63 or part.startswith("-") or part.endswith("-"): + return False + + for c in part: + # if the character is not a-z, 0-9, or '-', it is invalid + if (c < "a" or c > "z") and (c < "0" or c > "9") and c != "-": + return False + + all_digits = all_digits and c >= "0" and c <= "9" + + # the last part cannot be all numbers + return not all_digits + + +def _is_port(val: str) -> bool: + if len(val) == 0: + return False + + for c in val: + if c < "0" or c > "9": + return False + + try: + return int(val) <= 65535 + + except ValueError: + # Error converting to number + return False def is_host_and_port(string: celtypes.Value, port_required: celtypes.Value) -> celpy.Result: @@ -225,7 +252,33 @@ def is_host_and_port(string: celtypes.Value, port_required: celtypes.Value) -> c if not isinstance(port_required, celtypes.BoolType): msg = "invalid argument, expected bool" raise celpy.CELEvalError(msg) - return celtypes.BoolType(validate_host_and_port(string, port_required=bool(port_required))) + return celtypes.BoolType(_is_host_and_port(string, port_required=bool(port_required))) + + +def _is_host_and_port(val: str, *, port_required=False) -> bool: + if len(val) == 0: + return False + + split_idx = val.rfind(":") + if val[0] == "[": + end = val.rfind("]") + end_plus = end + 1 + + if end_plus == len(val): + return not port_required and _is_ip(val[1:end], 6) + elif end_plus == split_idx: + return _is_ip(val[1:end], 6) and _is_port(val[split_idx + 1 :]) + else: + # malformed + return False + + if split_idx < 0: + return not port_required and (_is_hostname(val) or _is_ip(val, 4)) + + host = val[0:split_idx] + port = val[split_idx + 1 :] + + return (_is_hostname(host) or _is_ip(host, 4)) and _is_port(port) def is_nan(val: celtypes.Value) -> celpy.Result: @@ -290,7 +343,7 @@ def address_prefix(self) -> bool: self.__address_part() and self.__take("/") and self.__prefix_length() and self._index == len(self._string) ) - def __get_bits(self) -> int: + def get_bits(self) -> int: """Get the bits of an address parsed through address() or address_prefix() Returns: @@ -301,7 +354,7 @@ def __get_bits(self) -> int: return (self._octets[0] << 24) | (self._octets[1] << 16) | (self._octets[2] << 8) | self._octets[3] - def __is_prefix_only(self) -> bool: + def is_prefix_only(self) -> bool: """Determines TODO Behavior is undefined if address_prefix() has not been called before or has returned false. @@ -309,7 +362,7 @@ def __is_prefix_only(self) -> bool: Returns: True if all bits to the right of the prefix-length are all zeros. False otherwise. """ - bits = self.__get_bits() + bits = self.get_bits() mask: int if self._prefix_len == 32: @@ -479,9 +532,15 @@ def __init__(self, string: str): super().__init__() self._string = string + self._index = 0 + self._pieces = [] self._double_colon_at = -1 + self._double_colon_seen = False + self._dotted_raw = "" + self._dotted_addr = None + self._zone_id_found = False - def __get_bits(self) -> int: + def get_bits(self) -> int: """Get the bits of an address parsed through address() or address_prefix() as a 128-bit integer. Returns: @@ -493,7 +552,7 @@ def __get_bits(self) -> int: # Handle dotted decimal, add to p16 if self._dotted_addr is not None: # Right-most 32 bits - dotted32 = self._dotted_addr.__get_bits() + dotted32 = self._dotted_addr.get_bits() # High 16 bits p16.append(dotted32 >> 16) # Low 16 bits @@ -522,7 +581,7 @@ def __get_bits(self) -> int: | p16[7] ) - def __is_prefix_only(self) -> bool: + def is_prefix_only(self) -> bool: """Determine whether string is an ipv6 prefix only. Behavior is undefined if address_prefix() has not been called before. @@ -530,7 +589,7 @@ def __is_prefix_only(self) -> bool: Returns: True if all bits to the right of the prefix-length are all zeros. False otherwise. """ - bits = self.__get_bits() + bits = self.get_bits() mask: int if self._prefix_len >= 128: mask = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF @@ -550,6 +609,241 @@ def address(self) -> bool: return self.__address_part() and self._index == len(self._string) + def address_prefix(self) -> bool: + """Parse an IPv6 Address Prefix following RFC 4291. Zone id is not permitted.""" + + return ( + self.__address_part() + and not self._zone_id_found + and self.__take("/") + and self.__prefix_length() + and self._index == len(self._string) + ) + + def __prefix_length(self) -> bool: + start = self._index + + while True: + if self._index >= len(self._string) or not self.__digit(): + break + + if self._index - start > 3: + return False + + string = self._string[start : self._index] + + if len(string) == 0: + # too short + return False + + if len(string) > 1 and string[0] == "0": + # bad leading 0 + return False + + try: + value = int(string) + + if value > 128: + # max 128 bits + return False + + self._prefix_len = value + + return True + + except ValueError: + # Error converting to number + return False + + def __address_part(self) -> bool: + """Store the dotted notation for right-most 32 bits in dottedRaw / dottedAddr if found.""" + + while True: + if self._index >= len(self._string): + break + + # dotted notation for right-most 32 bits, e.g. 0:0:0:0:0:ffff:192.1.56.10 + if self._double_colon_seen or (len(self._pieces) == 6 and self.__dotted()): + dotted = Ipv4(self._dotted_raw) + + if dotted.address(): + self._dotted_addr = dotted + return True + + return False + + if self.__h16(): + continue + + if self.__take(":"): + if self.__take(":"): + if self._double_colon_seen: + return False + + self._double_colon_seen = True + self._double_colon_at = len(self._pieces) + + if self.__take(":"): + return False + + continue + + if self._string[self._index] == "%" and not self.__zone_id(): + return False + + break + + return self._double_colon_seen or len(self._pieces) == 8 + + def __zone_id(self) -> bool: + """Determine whether string contains a zoneID. + + Method parses the rule from RFC 6874: + + ZoneID = 1*( unreserved / pct-encoded ) + + There is no definition for the character set allowed in the zone identifier. + RFC 4007 permits basically any non-null string. + """ + + start = self._index + + if self.__take("%"): + if len(self._string) - self._index > 0: + # permit any non-null string + self._index = len(self._string) + self._zone_id_found = True + + return True + + self._index = start + self._zone_id_found = False + + return False + + def __dotted(self) -> bool: + """Determine whether string contains a dotted address. + + Method parses the rule: + + 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + + Stores match in _dotted_raw. + """ + + start = self._index + self._dotted_raw = "" + + while True: + if self._index < len(self._string) and (self.__digit() or self.__take(".")): + continue + + break + + if self._index - start >= 7: + self._dotted_raw = self._string[start : self._index] + return True + + self._index = start + + return False + + def __h16(self) -> bool: + """Determine whether string contains an h16. + + Method parses the rule: + + h16 = 1*4HEXDIG + + Stores 16-bit value in _pieces. + """ + + start = self._index + + while True: + if self._index >= len(self._string) or not self.__hex_dig(): + break + + string = self._string[start : self._index] + + if len(string) == 0: + # too short + return False + + if len(string) > 4: + # too long + return False + + try: + value = int(string, 16) + + self._pieces.append(value) + + return True + + except ValueError: + # Error converting to number + return False + + return True + + def __hex_dig(self) -> bool: + """Report whether the current position is a hex digit. + + Method parses the rule: + + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + + if ("0" <= c <= "9") or ("a" <= c <= "f") or ("A" <= c <= "F") or ("0" <= c <= "9"): + self._index += 1 + + return True + + return False + + def __digit(self) -> bool: + """Report whether the current position is a digit. + + Method parses the rule: + + DIGIT = %x30-39 ; 0-9 + """ + + if self._index >= len(self._string): + return False + + c = self._string[self._index] + if "0" <= c <= "9": + self._index += 1 + return True + + return False + + def __take(self, char: str) -> bool: + """Take the given char at the current index. + + If char is at the current index, increment the index. + + Returns: + True if char is at the current index. False if char is not at the + current index or the end of string has been reached. + """ + + if self._index >= len(self._string): + return False + + if self._string[self._index] == char: + self._index += 1 + return True + + return False + class Uri: """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference. @@ -1326,7 +1620,7 @@ def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]: # protovalidate specific functions "isNan": is_nan, "isInf": is_inf, - "isIp": validate_ip, + "isIp": is_ip, "isIpPrefix": is_ip_prefix, "isEmail": is_email, "isUri": is_uri, From 9429db74e1e417f343fc51eb1998851d470dc17a Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Thu, 27 Mar 2025 10:48:20 -0400 Subject: [PATCH 09/11] Docs --- protovalidate/internal/extra_func.py | 169 ++++++++++++++------------- tests/conformance/nonconforming.yaml | 49 -------- 2 files changed, 87 insertions(+), 131 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index a40ec589..08a803d7 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -29,7 +29,9 @@ def is_ip(val: celtypes.Value, ver: typing.Optional[celtypes.Value] = None) -> celpy.Result: - """Validate whether a given string is a valid IP address according to an optional IP version. + """Return True if the string is an IPv4 or IPv6 address, optionally limited to a specific version. + + Version 0 or None means either 4 or 6. Passing a version other than 0, 4, or 6 always returns False. IPv4 addresses are expected in the dotted decimal format, for example "192.168.5.21". IPv6 addresses are expected in their text representation, for example "::1" or "2001:0DB8:ABCD:0012::0". @@ -37,20 +39,7 @@ def is_ip(val: celtypes.Value, ver: typing.Optional[celtypes.Value] = None) -> c Both formats are well-defined in the internet standard RFC 3986. Zone identifiers for IPv6 addresses (for example "fe80::a%en1") are supported. - Args: - val (celTypes.Value): The string to validate. - version (typing.Optional[celtypes.Value]): An optional version to use for validating the IP address. - Passing None for a version of 0 means either 4 or 6. - Passing a version other than 0, 4, or 6 always returns False. - - Returns: - True if the string is an IPv4 or IPv6 address, optionally limited to a specific version. - - Raises: - celpy.CELEvalError: If val is not an instance of celtypes.StringType or - if version is not an instance of celtypes.IntType and is not None. """ - if not isinstance(val, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -80,6 +69,25 @@ def _is_ip(string: str, version: int) -> bool: def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: + """Return True if the string is a valid IP with prefix length, optionally + limited to a specific version (v4 or v6), and optionally requiring the host + portion to be all zeros. + + An address prefix divides an IP address into a network portion, and a host portion. + The prefix length specifies how many bits the network portion has. + For example, the IPv6 prefix "2001:db8:abcd:0012::0/64" designates the + left-most 64 bits as the network prefix. The range of the network is 2**64 + addresses, from 2001:db8:abcd:0012::0 to 2001:db8:abcd:0012:ffff:ffff:ffff:ffff. + + An address prefix may include a specific host address, for example + "2001:db8:abcd:0012::1f/64". With strict = true, this is not permitted. The + host portion must be all zeros, as in "2001:db8:abcd:0012::0/64". + + The same principle applies to IPv4 addresses. "192.168.1.0/24" designates + the first 24 bits of the 32-bit IPv4 as the network prefix. + + """ + if not isinstance(val, celtypes.StringType): msg = "invalid argument, expected string or bytes" raise celpy.CELEvalError(msg) @@ -118,7 +126,7 @@ def _is_ip_prefix(string: str, version: int, *, strict=False) -> bool: def is_email(string: celtypes.Value) -> celpy.Result: - """Return true if the string is an email address, for example "foo@example.com". + """Return True if the string is an email address, for example "foo@example.com". Conforms to the definition for a valid email address from the HTML standard. Note that this standard willfully deviates from RFC 5322, which allows many @@ -134,7 +142,7 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: - """Return true if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". + """Return True if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". URI is defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874). @@ -148,7 +156,7 @@ def is_uri(string: celtypes.Value) -> celpy.Result: def is_uri_ref(string: celtypes.Value) -> celpy.Result: - """Return true if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" or + """Return True if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" or a Relative Reference such as "./foo/bar?query". URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. @@ -163,6 +171,17 @@ def is_uri_ref(string: celtypes.Value) -> celpy.Result: def is_hostname(val: celtypes.Value) -> celpy.Result: + """Returns True if the string is a valid hostname, for example "foo.example.com". + + A valid hostname follows the rules below: + - The name consists of one or more labels, separated by a dot ("."). + - Each label can be 1 to 63 alphanumeric characters. + - A label can contain hyphens ("-"), but must not start or end with a hyphen. + - The right-most label must not be digits only. + - The name can have a trailing dot, for example "foo.example.com.". + - The name can be 253 characters at most, excluding the optional trailing dot. + + """ if not isinstance(val, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -170,6 +189,7 @@ def is_hostname(val: celtypes.Value) -> celpy.Result: def _is_hostname(val: str) -> bool: + """Internal implementation""" if len(val) > 253: return False @@ -219,6 +239,18 @@ def _is_port(val: str) -> bool: def is_host_and_port(string: celtypes.Value, port_required: celtypes.Value) -> celpy.Result: + """Return True if the string is a valid host/port pair, for example "example.com:8080". + + If the argument `port_required` is True, the port is required. If the argument + is False, the port is optional. + + The host can be one of: + - An IPv4 address in dotted decimal format, for example "192.168.0.1". + - An IPv6 address enclosed in square brackets, for example "[::1]". + - A hostname, for example "example.com". + + The port is separated by a colon. It must be non-empty, with a decimal number in the range of 0-65535, inclusive. + """ if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -287,7 +319,7 @@ def unique(val: celtypes.Value) -> celpy.Result: class Ipv4: - """a class""" + """Ipv4 is a class used to parse a given string to determine if it is a valid IPv4 address or address prefix.""" _string: str _index: int @@ -295,10 +327,7 @@ class Ipv4: _prefix_len: int def __init__(self, string: str): - """ipv4 - - Args: - """ + """Initialize an Ipv4 validation class with a given string.""" super().__init__() self._string = string @@ -317,23 +346,21 @@ def address_prefix(self) -> bool: ) def get_bits(self) -> int: - """Get the bits of an address parsed through address() or address_prefix() + """Return the 32-bit value of an address parsed through address() or address_prefix(). + + Return -1 if no address was parsed successfully. - Returns: - The 32-bit value if address was parsed successfully. 0 if not successful. """ if len(self._octets) != 4: - return 0 + return -1 return (self._octets[0] << 24) | (self._octets[1] << 16) | (self._octets[2] << 8) | self._octets[3] def is_prefix_only(self) -> bool: - """Determines TODO + """Return True if all bits to the right of the prefix-length are all zeros. - Behavior is undefined if address_prefix() has not been called before or has returned false. + Behavior is undefined if address_prefix() has not been called before, or has returned False. - Returns: - True if all bits to the right of the prefix-length are all zeros. False otherwise. """ bits = self.get_bits() @@ -348,6 +375,8 @@ def is_prefix_only(self) -> bool: return bits == masked def __prefix_length(self) -> bool: + """Store value in `prefix_len`""" + start = self._index while True: @@ -436,11 +465,12 @@ def __dec_octet(self) -> bool: return False def __digit(self) -> bool: - """Reports whether the current position is a digit. + """Report whether the current position is a digit. Method parses the rule: - DIGIT = %x30-39 ; 0-9 + DIGIT = %x30-39 ; 0-9 + """ if self._index >= len(self._string): @@ -458,11 +488,7 @@ def __take(self, char: str) -> bool: If char is at the current index, increment the index. - Returns: - True if char is at the current index. False if char is not at the - current index or the end of string has been reached. """ - if self._index >= len(self._string): return False @@ -474,34 +500,20 @@ def __take(self, char: str) -> bool: class Ipv6: - """a class""" + """Ipv6 is a class used to parse a given string to determine if it is a valid IPv6 address or address prefix.""" _string: str _index: int - _pieces: list[int] - _double_colon_at: int + _pieces: list[int] # 16-bit pieces found + _double_colon_at: int # Number of 16-bit pieces found when double colon was found. _double_colon_seen: bool - _dotted_raw: str - _dotted_addr: typing.Optional[Ipv4] + _dotted_raw: str # Dotted notation for right-most 32 bits. + _dotted_addr: typing.Optional[Ipv4] # Dotted notation successfully parsed as Ipv4. _zone_id_found: bool - _prefix_len: int + _prefix_len: int # 0 -128 def __init__(self, string: str): - """ipv6 - - Args: - - Attributes: - _string (str): The string to parse. - _index (int): The index. - _pieces (list[int]): 16-bit pieces found. - _double_colon_at (bool): Number of 16-bit pieces found when double colon was found. - _double_colon_seen (bool): Whether a double colon has been seen in string. - _dotted_raw (str): Dotted notation for right-most 32 bits. - _dotted_addr (typing.Optional[Ipv4]): Dotted notation successfully parsed as Ipv4. - _zone_id_found (bool): Whether a zone ID has been found in string. - _prefix_len (int): 0 - 128 - """ + """Initialize a URI validation class with a given string.""" super().__init__() self._string = string @@ -514,12 +526,11 @@ def __init__(self, string: str): self._zone_id_found = False def get_bits(self) -> int: - """Get the bits of an address parsed through address() or address_prefix() as a 128-bit integer. + """Return the 128-bit value of an address parsed through address() or address_prefix(). - Returns: - The 128-bit value if address was parsed successfully. 0 if no address was parsed successfully. - """ + Return 0 if no address was parsed successfully. + """ p16 = self._pieces # Handle dotted decimal, add to p16 @@ -555,12 +566,10 @@ def get_bits(self) -> int: ) def is_prefix_only(self) -> bool: - """Determine whether string is an ipv6 prefix only. + """Return True if all bits to the right of the prefix-length are all zeros. - Behavior is undefined if address_prefix() has not been called before. + Behavior is undefined if address_prefix() has not been called before, or has returned False. - Returns: - True if all bits to the right of the prefix-length are all zeros. False otherwise. """ bits = self.get_bits() mask: int @@ -594,6 +603,7 @@ def address_prefix(self) -> bool: ) def __prefix_length(self) -> bool: + """Store value in `prefix_len`.""" start = self._index while True: @@ -629,7 +639,7 @@ def __prefix_length(self) -> bool: return False def __address_part(self) -> bool: - """Store the dotted notation for right-most 32 bits in dottedRaw / dottedAddr if found.""" + """Store dotted notation for right-most 32 bits in dotted_raw / dotted_addr if found.""" while True: if self._index >= len(self._string): @@ -671,14 +681,12 @@ def __address_part(self) -> bool: def __zone_id(self) -> bool: """Determine whether string contains a zoneID. - Method parses the rule from RFC 6874: + There is no definition for the character set allowed in the zone + identifier. RFC 4007 permits basically any non-null string. - ZoneID = 1*( unreserved / pct-encoded ) + RFC 6874: ZoneID = 1*( unreserved / pct-encoded ) - There is no definition for the character set allowed in the zone identifier. - RFC 4007 permits basically any non-null string. """ - start = self._index if self.__take("%"): @@ -699,7 +707,7 @@ def __dotted(self) -> bool: Method parses the rule: - 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT + 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT Stores match in _dotted_raw. """ @@ -726,7 +734,7 @@ def __h16(self) -> bool: Method parses the rule: - h16 = 1*4HEXDIG + h16 = 1*4HEXDIG Stores 16-bit value in _pieces. """ @@ -765,9 +773,9 @@ def __hex_dig(self) -> bool: Method parses the rule: - HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - """ + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + """ if self._index >= len(self._string): return False @@ -785,9 +793,9 @@ def __digit(self) -> bool: Method parses the rule: - DIGIT = %x30-39 ; 0-9 - """ + DIGIT = %x30-39 ; 0-9 + """ if self._index >= len(self._string): return False @@ -803,11 +811,7 @@ def __take(self, char: str) -> bool: If char is at the current index, increment the index. - Returns: - True if char is at the current index. False if char is not at the - current index or the end of string has been reached. """ - if self._index >= len(self._string): return False @@ -1441,7 +1445,7 @@ def __pct_encoded(self) -> bool: pct-encoded = "%" HEXDIG HEXDIG - Sets `_pct_encoded_found` to true if a valid triplet was found + Sets `_pct_encoded_found` to True if a valid triplet was found """ start = self._index @@ -1554,6 +1558,7 @@ def __take(self, char: str) -> bool: """Take the given char at the current index. If char is at the current index, increment the index. + """ if self._index >= len(self._string): return False diff --git a/tests/conformance/nonconforming.yaml b/tests/conformance/nonconforming.yaml index 7ac7fe88..7d49fe47 100644 --- a/tests/conformance/nonconforming.yaml +++ b/tests/conformance/nonconforming.yaml @@ -26,50 +26,11 @@ library/is_host_and_port: # want: validation error (1 violation) # 1. constraint_id: "library.is_host_and_port" # got: valid -library/is_ip: - - version/0/valid/ipv4 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"127.0.0.1" version:0} - # want: valid - # got: runtime error: invalid argument, expected 4 or 6 - - version/0/valid/ipv6 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"::1" version:0} - # want: valid - # got: runtime error: invalid argument, expected 4 or 6 - version/1/invalid/empty_string # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{version:1} # want: validation error (1 violation) # 1. constraint_id: "library.is_ip" # got: runtime error: invalid argument, expected 4 or 6 - - version/1/invalid/ipv4 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"127.0.0.1" version:1} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - - version/1/invalid/ipv6 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"::1" version:1} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - - version/5/invalid/ipv4 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"127.0.0.1" version:5} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - - version/5/invalid/ipv6 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"::1" version:5} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - - version/7/invalid/ipv4 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"127.0.0.1" version:7} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - - version/7/invalid/ipv6 - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"::1" version:7} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - version/omitted/valid/ipv6_zone-id_any_non_null_character # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"::1%% :x\x1f"} # want: valid @@ -106,21 +67,11 @@ library/is_ip_prefix: # want: validation error (1 violation) # 1. constraint_id: "library.is_ip_prefix" # got: valid - - version/omitted/strict/omitted/invalid/ipv4_missing_prefix - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"192.168.1.0"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: valid - version/omitted/strict/omitted/invalid/ipv6_bad_leading_zero_in_prefix-length # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"2001:0DB8:ABCD:0012:FFFF:FFFF:FFFF:FFFF/024"} # want: validation error (1 violation) # 1. constraint_id: "library.is_ip_prefix" # got: valid - - version/omitted/strict/omitted/invalid/ipv6_missing_prefix - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"2001:0DB8:ABCD:0012:FFFF:FFFF:FFFF:FFFF"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: valid - version/omitted/strict/omitted/invalid/ipv6_zone-id/a # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"::1%en1/64"} # want: validation error (1 violation) From 67055d448c5e444abd8d5a6d258fa50ecf3b48a1 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Thu, 27 Mar 2025 13:46:38 -0400 Subject: [PATCH 10/11] Final --- protovalidate/internal/extra_func.py | 9 +-- tests/conformance/nonconforming.yaml | 88 +++------------------------- tests/extra_func_test.py | 28 --------- 3 files changed, 12 insertions(+), 113 deletions(-) delete mode 100644 tests/extra_func_test.py diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 08a803d7..c9cb009f 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -193,10 +193,10 @@ def _is_hostname(val: str) -> bool: if len(val) > 253: return False - if val.endswith(val.lower()): - string = val[0 : len(val) - 1] + if val.endswith("."): + string = val[0 : len(val) - 1].lower() else: - string = val + string = val.lower() all_digits = False parts = string.lower().split(sep=".") @@ -265,6 +265,7 @@ def _is_host_and_port(val: str, *, port_required=False) -> bool: return False split_idx = val.rfind(":") + if val[0] == "[": end = val.rfind("]") end_plus = end + 1 @@ -646,7 +647,7 @@ def __address_part(self) -> bool: break # dotted notation for right-most 32 bits, e.g. 0:0:0:0:0:ffff:192.1.56.10 - if self._double_colon_seen or (len(self._pieces) == 6 and self.__dotted()): + if (self._double_colon_seen or len(self._pieces) == 6) and self.__dotted(): dotted = Ipv4(self._dotted_raw) if dotted.address(): diff --git a/tests/conformance/nonconforming.yaml b/tests/conformance/nonconforming.yaml index 7d49fe47..349f4e71 100644 --- a/tests/conformance/nonconforming.yaml +++ b/tests/conformance/nonconforming.yaml @@ -8,87 +8,13 @@ standard_constraints/well_known_types/timestamp: - gte_lte/invalid/above - lte/invalid -library/is_host_and_port: - - port_required/false/invalid/port_number_sign - # input: [type.googleapis.com/buf.validate.conformance.cases.IsHostAndPort]:{val:"example.com:+0"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_host_and_port" - # got: valid - - port_required/false/valid/ipv6_zone-id_any_non_null_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsHostAndPort]:{val:"[::1%% :x\x1f]"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_host_and_port" - # message: "" - # for_key: false - - port_required/true/invalid/port_number_sign - # input: [type.googleapis.com/buf.validate.conformance.cases.IsHostAndPort]:{val:"example.com:+0" port_required:true} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_host_and_port" - # got: valid - - version/1/invalid/empty_string - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{version:1} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # got: runtime error: invalid argument, expected 4 or 6 - - version/omitted/valid/ipv6_zone-id_any_non_null_character - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIp]:{val:"::1%% :x\x1f"} - # want: valid - # got: validation error (1 violation) - # 1. constraint_id: "library.is_ip" - # message: "" - # for_key: false -library/is_ip_prefix: - - version/0/strict/omitted/valid/ipv4_prefix - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"127.0.0.1/16" version:0} - # want: valid - # got: runtime error: invalid argument, expected 4 or 6 - - version/0/strict/omitted/valid/ipv6_prefix - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"::1/64" version:0} - # want: valid - # got: runtime error: invalid argument, expected 4 or 6 - - version/1/strict/omitted/invalid/empty_string - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{version:1} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: runtime error: invalid argument, expected 4 or 6 - - version/5/strict/omitted/invalid/ipv6_prefix - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"::1/64" version:5} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: runtime error: invalid argument, expected 4 or 6 - - version/7/strict/omitted/invalid/ipv6_prefix - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"::1/64" version:7} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: runtime error: invalid argument, expected 4 or 6 - - version/omitted/strict/omitted/invalid/ipv4_bad_leading_zero_in_prefix-length - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"192.168.1.0/024"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: valid - - version/omitted/strict/omitted/invalid/ipv6_bad_leading_zero_in_prefix-length - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"2001:0DB8:ABCD:0012:FFFF:FFFF:FFFF:FFFF/024"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: valid - - version/omitted/strict/omitted/invalid/ipv6_zone-id/a - # input: [type.googleapis.com/buf.validate.conformance.cases.IsIpPrefix]:{val:"::1%en1/64"} - # want: validation error (1 violation) - # 1. constraint_id: "library.is_ip_prefix" - # got: valid standard_constraints/required: + # The below tests are failing due to a bug in the conformance runner. These + # proto messages are marked as IGNORE_ALWAYS which means they should always + # pass, which this implementation does correctly. However, the runner is + # expecting them to fail. + # See: + # https://github.com/bufbuild/protovalidate/blob/main/proto/protovalidate-testing/buf/validate/conformance/cases/required_field_proto2.proto#L24 + # https://github.com/bufbuild/protovalidate/blob/main/proto/protovalidate-testing/buf/validate/conformance/cases/required_field_proto2.proto#L31 - proto2/scalar/optional/unset - # input: [type.googleapis.com/buf.validate.conformance.cases.RequiredProto2ScalarOptional]:{} - # want: validation error (1 violation) - # 1. constraint_id: "required" - # field: "val" elements:{field_number:1 field_name:"val" field_type:TYPE_STRING} - # rule: "required" elements:{field_number:25 field_name:"required" field_type:TYPE_BOOL} - # got: valid - proto2/scalar/optional_with_default/unset - # input: [type.googleapis.com/buf.validate.conformance.cases.RequiredProto2ScalarOptionalDefault]:{} - # want: validation error (1 violation) - # 1. constraint_id: "required" - # field: "val" elements:{field_number:1 field_name:"val" field_type:TYPE_STRING} - # rule: "required" elements:{field_number:25 field_name:"required" field_type:TYPE_BOOL} - # got: valid diff --git a/tests/extra_func_test.py b/tests/extra_func_test.py deleted file mode 100644 index 735ffeb9..00000000 --- a/tests/extra_func_test.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright 2023-2025 Buf Technologies, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from protovalidate.internal.extra_func import Ipv4, Uri - - -class TestFunc(unittest.TestCase): - def test_ninf(self): - uri = Uri("https://foo%c3x%96") - is_it = uri.uri() - self.assertFalse(is_it) - - def test_is_ip(self): - v4 = Ipv4("127.0.0.1").address() - self.assertTrue(v4) From e09386c9b15020deb511ef1c154087b7796e1a37 Mon Sep 17 00:00:00 2001 From: Steve Ayers Date: Fri, 28 Mar 2025 11:08:51 -0400 Subject: [PATCH 11/11] Update function names --- protovalidate/internal/extra_func.py | 42 +++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index c9cb009f..54f825a2 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -28,7 +28,7 @@ ) -def is_ip(val: celtypes.Value, ver: typing.Optional[celtypes.Value] = None) -> celpy.Result: +def cel_is_ip(val: celtypes.Value, ver: typing.Optional[celtypes.Value] = None) -> celpy.Result: """Return True if the string is an IPv4 or IPv6 address, optionally limited to a specific version. Version 0 or None means either 4 or 6. Passing a version other than 0, 4, or 6 always returns False. @@ -68,7 +68,7 @@ def _is_ip(string: str, version: int) -> bool: return valid -def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: +def cel_is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: """Return True if the string is a valid IP with prefix length, optionally limited to a specific version (v4 or v6), and optionally requiring the host portion to be all zeros. @@ -125,7 +125,7 @@ def _is_ip_prefix(string: str, version: int, *, strict=False) -> bool: return valid -def is_email(string: celtypes.Value) -> celpy.Result: +def cel_is_email(string: celtypes.Value) -> celpy.Result: """Return True if the string is an email address, for example "foo@example.com". Conforms to the definition for a valid email address from the HTML standard. @@ -141,7 +141,7 @@ def is_email(string: celtypes.Value) -> celpy.Result: return celtypes.BoolType(m) -def is_uri(string: celtypes.Value) -> celpy.Result: +def cel_is_uri(string: celtypes.Value) -> celpy.Result: """Return True if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". URI is defined in the internet standard RFC 3986. @@ -155,7 +155,7 @@ def is_uri(string: celtypes.Value) -> celpy.Result: return celtypes.BoolType(valid) -def is_uri_ref(string: celtypes.Value) -> celpy.Result: +def cel_is_uri_ref(string: celtypes.Value) -> celpy.Result: """Return True if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" or a Relative Reference such as "./foo/bar?query". @@ -170,7 +170,7 @@ def is_uri_ref(string: celtypes.Value) -> celpy.Result: return celtypes.BoolType(valid) -def is_hostname(val: celtypes.Value) -> celpy.Result: +def cel_is_hostname(val: celtypes.Value) -> celpy.Result: """Returns True if the string is a valid hostname, for example "foo.example.com". A valid hostname follows the rules below: @@ -238,7 +238,7 @@ def _is_port(val: str) -> bool: return False -def is_host_and_port(string: celtypes.Value, port_required: celtypes.Value) -> celpy.Result: +def cel_is_host_and_port(string: celtypes.Value, port_required: celtypes.Value) -> celpy.Result: """Return True if the string is a valid host/port pair, for example "example.com:8080". If the argument `port_required` is True, the port is required. If the argument @@ -287,14 +287,14 @@ def _is_host_and_port(val: str, *, port_required=False) -> bool: return (_is_hostname(host) or _is_ip(host, 4)) and _is_port(port) -def is_nan(val: celtypes.Value) -> celpy.Result: +def cel_is_nan(val: celtypes.Value) -> celpy.Result: if not isinstance(val, celtypes.DoubleType): msg = "invalid argument, expected double" raise celpy.CELEvalError(msg) return celtypes.BoolType(math.isnan(val)) -def is_inf(val: celtypes.Value, sign: typing.Optional[celtypes.Value] = None) -> celpy.Result: +def cel_is_inf(val: celtypes.Value, sign: typing.Optional[celtypes.Value] = None) -> celpy.Result: if not isinstance(val, celtypes.DoubleType): msg = "invalid argument, expected double" raise celpy.CELEvalError(msg) @@ -312,7 +312,7 @@ def is_inf(val: celtypes.Value, sign: typing.Optional[celtypes.Value] = None) -> return celtypes.BoolType(math.isinf(val)) -def unique(val: celtypes.Value) -> celpy.Result: +def cel_unique(val: celtypes.Value) -> celpy.Result: if not isinstance(val, celtypes.ListType): msg = "invalid argument, expected list" raise celpy.CELEvalError(msg) @@ -767,8 +767,6 @@ def __h16(self) -> bool: # Error converting to number return False - return True - def __hex_dig(self) -> bool: """Report whether the current position is a hex digit. @@ -1579,16 +1577,16 @@ def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]: # Missing standard functions "format": string_fmt.format, # protovalidate specific functions - "isNan": is_nan, - "isInf": is_inf, - "isIp": is_ip, - "isIpPrefix": is_ip_prefix, - "isEmail": is_email, - "isUri": is_uri, - "isUriRef": is_uri_ref, - "isHostname": is_hostname, - "isHostAndPort": is_host_and_port, - "unique": unique, + "isNan": cel_is_nan, + "isInf": cel_is_inf, + "isIp": cel_is_ip, + "isIpPrefix": cel_is_ip_prefix, + "isEmail": cel_is_email, + "isUri": cel_is_uri, + "isUriRef": cel_is_uri_ref, + "isHostname": cel_is_hostname, + "isHostAndPort": cel_is_host_and_port, + "unique": cel_unique, }