diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py index 09ba69e3..3ad72b1c 100644 --- a/protovalidate/internal/extra_func.py +++ b/protovalidate/internal/extra_func.py @@ -145,23 +145,14 @@ def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result: def is_email(string: celtypes.Value) -> celpy.Result: - """Validate whether string is a valid email address. + """Return true if the string is an email address, for example "foo@example.com". Conforms to the definition for a valid email address from the HTML standard. Note that this standard willfully deviates from RFC 5322, which allows many unexpected forms of email addresses and will easily match a typographical error. - Args: - string (celTypes.Value): The string to validate. - - Returns: - True if the string is an email address, for example "foo@example.com". False otherwise. - - Raises: - celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ - if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -170,21 +161,12 @@ def is_email(string: celtypes.Value) -> celpy.Result: def is_uri(string: celtypes.Value) -> celpy.Result: - """Validate whether string is a valid URI. + """Return true if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". URI is defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874). - Args: - string (celTypes.Value): The string to validate. - - Returns: - True if the string is a URI, for example "https://example.com/foo/bar?baz=quux#frag". False otherwise. - - Raises: - celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ - if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -193,22 +175,13 @@ def is_uri(string: celtypes.Value) -> celpy.Result: def is_uri_ref(string: celtypes.Value) -> celpy.Result: - """Validate whether string is a valid URI reference. + """Return true if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" or + a Relative Reference such as "./foo/bar?query". URI, URI Reference, and Relative Reference are defined in the internet standard RFC 3986. Zone Identifiers in IPv6 address literals are supported (RFC 6874). - Args: - string (celTypes.Value): The string to validate. - - Returns: - True if the string is a URI Reference - a URI such as "https://example.com/foo/bar?baz=quux#frag" - or a Relative Reference such as "./foo/bar?query". False otherwise. - - Raises: - celpy.CELEvalError: If string is not an instance of celtypes.StringType. """ - if not isinstance(string, celtypes.StringType): msg = "invalid argument, expected string" raise celpy.CELEvalError(msg) @@ -266,27 +239,14 @@ def unique(val: celtypes.Value) -> celpy.Result: class Uri: - """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference. - - Callers can validate a string by constructing an instance of this class and then calling one of its - public methods: - uri() - uri_reference() - - Each method will return True or False depending on whether it passes validation. - """ + """Uri is a class used to parse a given string to determine if it is a valid URI or URI reference.""" _string: str _index: int _pct_encoded_found: bool def __init__(self, string: str): - """Initialize a URI validation class with a given string - - Args: - string (str): String to validate as a URI or URI reference. - """ - + """Initialize a URI validation class with a given string.""" super().__init__() self._string = string self._index = 0 @@ -296,9 +256,9 @@ def uri(self) -> bool: Method parses the rule: - URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] - """ + URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + """ start = self._index if not (self.__scheme() and self.__take(":") and self.__hier_part()): self._index = start @@ -321,9 +281,9 @@ def uri_reference(self) -> bool: Method parses the rule: - URI-reference = URI / relative-ref - """ + URI-reference = URI / relative-ref + """ return self.uri() or self.__relative_ref() def __hier_part(self) -> bool: @@ -331,12 +291,12 @@ def __hier_part(self) -> bool: Method parses the rule: - hier-part = "//" authority path-abempty. - / path-absolute - / path-rootless - / path-empty - """ + hier-part = "//" authority path-abempty. + / path-absolute + / path-rootless + / path-empty + """ start = self._index if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): return True @@ -350,9 +310,9 @@ def __relative_ref(self) -> bool: Method parses the rule: - relative-ref = relative-part [ "?" query ] [ "#" fragment ] - """ + relative-ref = relative-part [ "?" query ] [ "#" fragment ] + """ start = self._index if not self.__relative_part(): return False @@ -376,12 +336,12 @@ def __relative_part(self) -> bool: Method parses the rule: - relative-part = "//" authority path-abempty - / path-absolute - / path-noscheme - / path-empty - """ + relative-part = "//" authority path-abempty + / path-absolute + / path-noscheme + / path-empty + """ start = self._index if self.__take("/") and self.__take("/") and self.__authority() and self.__path_abempty(): return True @@ -395,11 +355,11 @@ def __scheme(self) -> bool: Method parses the rule: - scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) Terminated by ":". - """ + """ start = self._index if self.__alpha(): while self.__alpha() or self.__digit() or self.__take("+") or self.__take("-") or self.__take("."): @@ -416,11 +376,11 @@ def __authority(self) -> bool: Method parses the rule: - authority = [ userinfo "@" ] host [ ":" port ] + authority = [ userinfo "@" ] host [ ":" port ] Lead by double slash ("") and terminated by "/", "?", "#", or end of URI. - """ + """ start = self._index if self.__userinfo(): if not self.__take("@"): @@ -448,8 +408,8 @@ def __is_authority_end(self) -> bool: The authority component [...] is terminated by the next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end of the URI. - """ + """ return ( self._index >= len(self._string) or self._string[self._index] == "?" @@ -462,11 +422,11 @@ def __userinfo(self) -> bool: Method parses the rule: - userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) Terminated by "@" in authority. - """ + """ start = self._index while True: if self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":"): @@ -480,7 +440,7 @@ def __userinfo(self) -> bool: return False def __check_host_pct_encoded(self, string: str) -> bool: - """Verify that string is correctly percent-encoded""" + """Verify that string is correctly percent-encoded.""" try: # unquote defaults to 'UTF-8' encoding. urlparse.unquote(string, errors="strict") @@ -494,9 +454,9 @@ def __host(self) -> bool: Method parses the rule: - host = IP-literal / IPv4address / reg-name. - """ + host = IP-literal / IPv4address / reg-name. + """ if self._index >= len(self._string): return False @@ -522,11 +482,11 @@ def __port(self) -> bool: Method parses the rule: - port = *DIGIT + port = *DIGIT Terminated by end of authority. - """ + """ start = self._index while True: if self.__digit(): @@ -543,9 +503,9 @@ def __ip_literal(self) -> bool: Method parses the rule from RFC 6874: - IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" - """ + IP-literal = "[" ( IPv6address / IPv6addrz / IPvFuture ) "]" + """ start = self._index if self.__take("["): @@ -572,8 +532,8 @@ def __ipv6_address(self) -> bool: Method parses the rule "IPv6address". Relies on the implementation of validate_ip. - """ + """ start = self._index while self.__hex_dig() or self.__take(":"): pass @@ -589,9 +549,9 @@ def __ipv6_addrz(self) -> bool: Method parses the rule from RFC 6874: - IPv6addrz = IPv6address "%25" ZoneID - """ + IPv6addrz = IPv6address "%25" ZoneID + """ start = self._index if self.__ipv6_address() and self.__take("%") and self.__take("2") and self.__take("5") and self.__zone_id(): return True @@ -605,9 +565,9 @@ def __zone_id(self) -> bool: Method parses the rule from RFC 6874: - ZoneID = 1*( unreserved / pct-encoded ) - """ + ZoneID = 1*( unreserved / pct-encoded ) + """ start = self._index while self.__unreserved() or self.__pct_encoded(): pass @@ -624,9 +584,9 @@ def __ip_vfuture(self) -> bool: Method parses the rule: - IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) - """ + IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + """ start = self._index if self.__take("v") and self.__hex_dig(): @@ -650,11 +610,11 @@ def __reg_name(self) -> bool: Method parses the rule: - reg-name = *( unreserved / pct-encoded / sub-delims ) + reg-name = *( unreserved / pct-encoded / sub-delims ) Terminates on start of port (":") or end of authority. - """ + """ start = self._index while True: if self.__unreserved() or self.__pct_encoded() or self.__sub_delims(): @@ -676,8 +636,8 @@ def __is_path_end(self) -> bool: > The path is terminated by the first question mark ("?") or > number sign ("#") character, or by the end of the URI. - """ + """ return self._index >= len(self._string) or self._string[self._index] == "?" or self._string[self._index] == "#" def __path_abempty(self) -> bool: @@ -685,11 +645,11 @@ def __path_abempty(self) -> bool: Method parses the rule: - path-abempty = *( "/" segment ) + path-abempty = *( "/" segment ) Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index while self.__take("/") and self.__segment(): pass @@ -706,11 +666,11 @@ def __path_absolute(self) -> bool: Method parses the rule: - path-absolute = "/" [ segment-nz *( "/" segment ) ] + path-absolute = "/" [ segment-nz *( "/" segment ) ] Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index if self.__take("/"): @@ -730,11 +690,11 @@ def __path_noscheme(self) -> bool: Method parses the rule: - path-noscheme = segment-nz-nc *( "/" segment ) + path-noscheme = segment-nz-nc *( "/" segment ) Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index if self.__segment_nz_nc(): while self.__take("/") and self.__segment(): @@ -752,11 +712,11 @@ def __path_rootless(self) -> bool: Method parses the rule: - path-rootless = segment-nz *( "/" segment ) + path-rootless = segment-nz *( "/" segment ) Terminated by end of path: "?", "#", or end of URI. - """ + """ start = self._index if self.__segment_nz(): @@ -775,11 +735,11 @@ def __path_empty(self) -> bool: Method parses the rule: - path-empty = 0 + path-empty = 0 Terminated by end of path: "?", "#", or end of URI. - """ + """ return self.__is_path_end() def __segment(self) -> bool: @@ -787,9 +747,9 @@ def __segment(self) -> bool: Method parses the rule: - segment = *pchar - """ + segment = *pchar + """ while self.__pchar(): pass @@ -800,9 +760,9 @@ def __segment_nz(self) -> bool: Method parses the rule: - segment-nz = 1*pchar - """ + segment-nz = 1*pchar + """ start = self._index if self.__pchar(): @@ -820,10 +780,10 @@ def __segment_nz_nc(self) -> bool: Method parses the rule: - segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) - ; non-zero-length segment without any colon ":" - """ + segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + ; non-zero-length segment without any colon ":" + """ start = self._index while self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take("@"): @@ -841,9 +801,9 @@ def __pchar(self) -> bool: Method parses the rule: - pchar = unreserved / pct-encoded / sub-delims / ":" / "@" - """ + pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + """ return ( self.__unreserved() or self.__pct_encoded() or self.__sub_delims() or self.__take(":") or self.__take("@") ) @@ -853,11 +813,11 @@ def __query(self) -> bool: Method parses the rule: - query = *( pchar / "/" / "?" ) + query = *( pchar / "/" / "?" ) Terminated by "#" or end of URI. - """ + """ start = self._index while True: @@ -876,11 +836,11 @@ def __fragment(self) -> bool: Method parses the rule: - fragment = *( pchar / "/" / "?" ) + fragment = *( pchar / "/" / "?" ) Terminated by end of URI. - """ + """ start = self._index while True: @@ -899,11 +859,11 @@ def __pct_encoded(self) -> bool: Method parses the rule: - pct-encoded = "%" HEXDIG HEXDIG + pct-encoded = "%" HEXDIG HEXDIG Sets `_pct_encoded_found` to true if a valid triplet was found - """ + """ start = self._index if self.__take("%") and self.__hex_dig() and self.__hex_dig(): @@ -919,9 +879,9 @@ def __unreserved(self) -> bool: Method parses the rule: - unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - """ + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + """ return ( self.__alpha() or self.__digit() @@ -936,10 +896,10 @@ def __sub_delims(self) -> bool: Method parses the rule: - sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - / "*" / "+" / "," / ";" / "=" - """ + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" + """ return ( self.__take("!") or self.__take("$") @@ -959,9 +919,9 @@ def __alpha(self) -> bool: Method parses the rule: - ALPHA = %x41-5A / %x61-7A ; A-Z / a-z - """ + ALPHA = %x41-5A / %x61-7A ; A-Z / a-z + """ if self._index >= len(self._string): return False @@ -977,9 +937,9 @@ def __digit(self) -> bool: Method parses the rule: - DIGIT = %x30-39 ; 0-9 - """ + DIGIT = %x30-39 ; 0-9 + """ if self._index >= len(self._string): return False @@ -995,9 +955,9 @@ def __hex_dig(self) -> bool: Method parses the rule: - HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" - """ + HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" + """ if self._index >= len(self._string): return False @@ -1014,12 +974,7 @@ def __take(self, char: str) -> bool: """Take the given char at the current index. If char is at the current index, increment the index. - - Returns: - True if char is at the current index. False if char is not at the - current index or the end of string has been reached. """ - if self._index >= len(self._string): return False