Skip to content

Commit 52cf9a2

Browse files
authored
Enhance isEmail validation (#269)
This enhances the current `isEmail` validation by using a consistent regex that will be used across protovalidate implementations.
1 parent f9aea7e commit 52cf9a2

File tree

2 files changed

+16
-59
lines changed

2 files changed

+16
-59
lines changed

protovalidate/internal/extra_func.py

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
# limitations under the License.
1414

1515
import math
16+
import re
1617
import typing
17-
from email.utils import parseaddr
1818
from ipaddress import IPv4Address, IPv4Network, IPv6Address, IPv6Network, ip_address, ip_network
1919
from urllib import parse as urlparse
2020

@@ -23,6 +23,11 @@
2323

2424
from protovalidate.internal import string_format
2525

26+
# See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
27+
_email_regex = re.compile(
28+
r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
29+
)
30+
2631

2732
def _validate_hostname(host):
2833
if not host:
@@ -49,23 +54,6 @@ def _validate_hostname(host):
4954
return not all_digits
5055

5156

52-
def validate_email(addr):
53-
parts = parseaddr(addr)
54-
if addr != parts[1]:
55-
return False
56-
57-
addr = parts[1]
58-
if len(addr) > 254:
59-
return False
60-
61-
parts = addr.split("@")
62-
if len(parts) != 2:
63-
return False
64-
if len(parts[0]) > 64:
65-
return False
66-
return _validate_hostname(parts[1])
67-
68-
6957
def validate_host_and_port(string: str, *, port_required: bool) -> bool:
7058
if not string:
7159
return False
@@ -157,10 +145,19 @@ def is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result:
157145

158146

159147
def is_email(string: celtypes.Value) -> celpy.Result:
148+
"""Returns true if the string is an email address, for example "[email protected]".
149+
150+
Conforms to the definition for a valid email address from the HTML standard.
151+
Note that this standard willfully deviates from RFC 5322, which allows many
152+
unexpected forms of email addresses and will easily match a typographical
153+
error.
154+
"""
155+
160156
if not isinstance(string, celtypes.StringType):
161157
msg = "invalid argument, expected string"
162158
raise celpy.CELEvalError(msg)
163-
return celtypes.BoolType(validate_email(string))
159+
m = _email_regex.match(string) is not None
160+
return celtypes.BoolType(m)
164161

165162

166163
def is_uri(string: celtypes.Value) -> celpy.Result:

tests/conformance/nonconforming.yaml

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,46 +8,6 @@ standard_constraints/well_known_types/timestamp:
88
- gte_lte/invalid/above
99
- lte/invalid
1010

11-
library/is_email:
12-
- invalid/left_side_empty
13-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"@example.com"}
14-
# want: validation error (1 violation)
15-
# 1. constraint_id: "library.is_email"
16-
# got: valid
17-
- invalid/non_ascii
18-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"µ@example.com"}
19-
# want: validation error (1 violation)
20-
# 1. constraint_id: "library.is_email"
21-
# got: valid
22-
- invalid/quoted-string/a
23-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"\"foo bar\"@example.com"}
24-
# want: validation error (1 violation)
25-
# 1. constraint_id: "library.is_email"
26-
# got: valid
27-
- invalid/quoted-string/b
28-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"\"foo..bar\"@example.com"}
29-
# want: validation error (1 violation)
30-
# 1. constraint_id: "library.is_email"
31-
# got: valid
32-
- invalid/trailing_dot
33-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"[email protected]."}
34-
# want: validation error (1 violation)
35-
# 1. constraint_id: "library.is_email"
36-
# got: valid
37-
- valid/exhaust_atext
38-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!#$%&'*+-/=?^_`{|}[email protected]"}
39-
# want: valid
40-
# got: validation error (1 violation)
41-
# 1. constraint_id: "library.is_email"
42-
# message: ""
43-
# for_key: false
44-
- valid/label_all_digits
45-
# input: [type.googleapis.com/buf.validate.conformance.cases.IsEmail]:{val:"[email protected]"}
46-
# want: valid
47-
# got: validation error (1 violation)
48-
# 1. constraint_id: "library.is_email"
49-
# message: ""
50-
# for_key: false
5111
library/is_host_and_port:
5212
- port_required/false/invalid/port_number_sign
5313
# input: [type.googleapis.com/buf.validate.conformance.cases.IsHostAndPort]:{val:"example.com:+0"}

0 commit comments

Comments
 (0)