Skip to content

Commit 466935e

Browse files
authored
Unicode support in EmailField (#1527)
1 parent b52d3e3 commit 466935e

File tree

3 files changed

+182
-24
lines changed

3 files changed

+182
-24
lines changed

mongoengine/fields.py

Lines changed: 94 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import decimal
33
import itertools
44
import re
5+
import socket
56
import time
67
import uuid
78
import warnings
@@ -154,21 +155,105 @@ class EmailField(StringField):
154155
155156
.. versionadded:: 0.4
156157
"""
158+
USER_REGEX = re.compile(
159+
# `dot-atom` defined in RFC 5322 Section 3.2.3.
160+
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*\Z"
161+
# `quoted-string` defined in RFC 5322 Section 3.2.4.
162+
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)',
163+
re.IGNORECASE
164+
)
165+
166+
UTF8_USER_REGEX = re.compile(
167+
six.u(
168+
# RFC 6531 Section 3.3 extends `atext` (used by dot-atom) to
169+
# include `UTF8-non-ascii`.
170+
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z\u0080-\U0010FFFF]+)*\Z"
171+
# `quoted-string`
172+
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"\Z)'
173+
), re.IGNORECASE | re.UNICODE
174+
)
157175

158-
EMAIL_REGEX = re.compile(
159-
# dot-atom
160-
r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*"
161-
# quoted-string
162-
r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-011\013\014\016-\177])*"'
163-
# domain (max length of an ICAAN TLD is 22 characters)
164-
r')@(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}|[A-Z0-9-]{2,}(?<!-))$', re.IGNORECASE
176+
DOMAIN_REGEX = re.compile(
177+
r'((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+)(?:[A-Z0-9-]{2,63}(?<!-))\Z',
178+
re.IGNORECASE
165179
)
166180

181+
error_msg = u'Invalid email address: %s'
182+
183+
def __init__(self, domain_whitelist=None, allow_utf8_user=False,
184+
allow_ip_domain=False, *args, **kwargs):
185+
"""Initialize the EmailField.
186+
187+
Args:
188+
domain_whitelist (list) - list of otherwise invalid domain
189+
names which you'd like to support.
190+
allow_utf8_user (bool) - if True, the user part of the email
191+
address can contain UTF8 characters.
192+
False by default.
193+
allow_ip_domain (bool) - if True, the domain part of the email
194+
can be a valid IPv4 or IPv6 address.
195+
"""
196+
self.domain_whitelist = domain_whitelist or []
197+
self.allow_utf8_user = allow_utf8_user
198+
self.allow_ip_domain = allow_ip_domain
199+
super(EmailField, self).__init__(*args, **kwargs)
200+
201+
def validate_user_part(self, user_part):
202+
"""Validate the user part of the email address. Return True if
203+
valid and False otherwise.
204+
"""
205+
if self.allow_utf8_user:
206+
return self.UTF8_USER_REGEX.match(user_part)
207+
return self.USER_REGEX.match(user_part)
208+
209+
def validate_domain_part(self, domain_part):
210+
"""Validate the domain part of the email address. Return True if
211+
valid and False otherwise.
212+
"""
213+
# Skip domain validation if it's in the whitelist.
214+
if domain_part in self.domain_whitelist:
215+
return True
216+
217+
if self.DOMAIN_REGEX.match(domain_part):
218+
return True
219+
220+
# Validate IPv4/IPv6, e.g. user@[192.168.0.1]
221+
if (
222+
self.allow_ip_domain and
223+
domain_part[0] == '[' and
224+
domain_part[-1] == ']'
225+
):
226+
for addr_family in (socket.AF_INET, socket.AF_INET6):
227+
try:
228+
socket.inet_pton(addr_family, domain_part[1:-1])
229+
return True
230+
except (socket.error, UnicodeEncodeError):
231+
pass
232+
233+
return False
234+
167235
def validate(self, value):
168-
if not EmailField.EMAIL_REGEX.match(value):
169-
self.error('Invalid email address: %s' % value)
170236
super(EmailField, self).validate(value)
171237

238+
if '@' not in value:
239+
self.error(self.error_msg % value)
240+
241+
user_part, domain_part = value.rsplit('@', 1)
242+
243+
# Validate the user part.
244+
if not self.validate_user_part(user_part):
245+
self.error(self.error_msg % value)
246+
247+
# Validate the domain and, if invalid, see if it's IDN-encoded.
248+
if not self.validate_domain_part(domain_part):
249+
try:
250+
domain_part = domain_part.encode('idna').decode('ascii')
251+
except UnicodeError:
252+
self.error(self.error_msg % value)
253+
else:
254+
if not self.validate_domain_part(domain_part):
255+
self.error(self.error_msg % value)
256+
172257

173258
class IntField(BaseField):
174259
"""32-bit integer field."""

tests/document/instance.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,7 @@ def test_save(self):
844844
class Recipient(Document):
845845
email = EmailField(required=True)
846846

847-
recipient = Recipient(email='root@localhost')
847+
recipient = Recipient(email='not-an-email')
848848
self.assertRaises(ValidationError, recipient.save)
849849
recipient.save(validate=False)
850850

tests/fields/fields.py

Lines changed: 87 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import itertools
77
import re
88
import pymongo
9+
import sys
910

1011
from nose.plugins.skip import SkipTest
1112
from collections import OrderedDict
@@ -342,8 +343,6 @@ def test_url_validation(self):
342343
class Link(Document):
343344
url = URLField()
344345

345-
Link.drop_collection()
346-
347346
link = Link()
348347
link.url = 'google'
349348
self.assertRaises(ValidationError, link.validate)
@@ -356,8 +355,6 @@ def test_unicode_url_validation(self):
356355
class Link(Document):
357356
url = URLField()
358357

359-
Link.drop_collection()
360-
361358
link = Link()
362359
link.url = u'http://привет.com'
363360

@@ -3456,23 +3453,99 @@ def test_email_field(self):
34563453
class User(Document):
34573454
email = EmailField()
34583455

3459-
user = User(email="[email protected]")
3460-
self.assertTrue(user.validate() is None)
3456+
user = User(email='[email protected]')
3457+
user.validate()
34613458

3462-
user = User(email="[email protected]")
3463-
self.assertTrue(user.validate() is None)
3459+
user = User(email='[email protected]')
3460+
user.validate()
34643461

3465-
user = User(email=("Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S"
3466-
"aJIazqqWkm7.net"))
3467-
self.assertTrue(user.validate() is None)
3462+
user = User(email=('Kofq@rhom0e4klgauOhpbpNdogawnyIKvQS0wk2mjqrgGQ5S'
3463+
'aJIazqqWkm7.net'))
3464+
user.validate()
34683465

3469-
user = User(email="[email protected]")
3470-
self.assertTrue(user.validate() is None)
3466+
user = User(email='[email protected]')
3467+
user.validate()
3468+
3469+
user = User(email='[email protected].')
3470+
self.assertRaises(ValidationError, user.validate)
3471+
3472+
# unicode domain
3473+
user = User(email=u'user@пример.рф')
3474+
user.validate()
3475+
3476+
# invalid unicode domain
3477+
user = User(email=u'user@пример')
3478+
self.assertRaises(ValidationError, user.validate)
3479+
3480+
# invalid data type
3481+
user = User(email=123)
3482+
self.assertRaises(ValidationError, user.validate)
3483+
3484+
def test_email_field_unicode_user(self):
3485+
# Don't run this test on pypy3, which doesn't support unicode regex:
3486+
# https://bitbucket.org/pypy/pypy/issues/1821/regular-expression-doesnt-find-unicode
3487+
if sys.version_info[:2] == (3, 2):
3488+
raise SkipTest('unicode email addresses are not supported on PyPy 3')
3489+
3490+
class User(Document):
3491+
email = EmailField()
3492+
3493+
# unicode user shouldn't validate by default...
3494+
user = User(email=u'Dörte@Sörensen.example.com')
3495+
self.assertRaises(ValidationError, user.validate)
3496+
3497+
# ...but it should be fine with allow_utf8_user set to True
3498+
class User(Document):
3499+
email = EmailField(allow_utf8_user=True)
34713500

3501+
user = User(email=u'Dörte@Sörensen.example.com')
3502+
user.validate()
3503+
3504+
def test_email_field_domain_whitelist(self):
3505+
class User(Document):
3506+
email = EmailField()
3507+
3508+
# localhost domain shouldn't validate by default...
34723509
user = User(email='me@localhost')
34733510
self.assertRaises(ValidationError, user.validate)
34743511

3475-
user = User(email="[email protected].")
3512+
# ...but it should be fine if it's whitelisted
3513+
class User(Document):
3514+
email = EmailField(domain_whitelist=['localhost'])
3515+
3516+
user = User(email='me@localhost')
3517+
user.validate()
3518+
3519+
def test_email_field_ip_domain(self):
3520+
class User(Document):
3521+
email = EmailField()
3522+
3523+
valid_ipv4 = 'email@[127.0.0.1]'
3524+
valid_ipv6 = 'email@[2001:dB8::1]'
3525+
invalid_ip = 'email@[324.0.0.1]'
3526+
3527+
# IP address as a domain shouldn't validate by default...
3528+
user = User(email=valid_ipv4)
3529+
self.assertRaises(ValidationError, user.validate)
3530+
3531+
user = User(email=valid_ipv6)
3532+
self.assertRaises(ValidationError, user.validate)
3533+
3534+
user = User(email=invalid_ip)
3535+
self.assertRaises(ValidationError, user.validate)
3536+
3537+
# ...but it should be fine with allow_ip_domain set to True
3538+
class User(Document):
3539+
email = EmailField(allow_ip_domain=True)
3540+
3541+
user = User(email=valid_ipv4)
3542+
user.validate()
3543+
3544+
user = User(email=valid_ipv6)
3545+
user.validate()
3546+
3547+
# invalid IP should still fail validation
3548+
user = User(email=invalid_ip)
34763549
self.assertRaises(ValidationError, user.validate)
34773550

34783551
def test_email_field_honors_regex(self):

0 commit comments

Comments
 (0)