Skip to content

Commit c2b1e1c

Browse files
Merge pull request #132 from davishmcclurg/email
Better email validation
2 parents 8ac15d8 + bbb49a4 commit c2b1e1c

File tree

5 files changed

+84
-9
lines changed

5 files changed

+84
-9
lines changed

lib/json_schemer.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
require 'json_schemer/version'
1717
require 'json_schemer/format/hostname'
1818
require 'json_schemer/format/uri_template'
19+
require 'json_schemer/format/email'
1920
require 'json_schemer/format'
2021
require 'json_schemer/errors'
2122
require 'json_schemer/cached_resolver'

lib/json_schemer/format.rb

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
# frozen_string_literal: true
22
module JSONSchemer
33
module Format
4+
include Email
45
include Hostname
56
include URITemplate
67

7-
# this is no good
8-
EMAIL_REGEX = /\A[^@\s]+@([\p{L}\d-]+\.)+[\p{L}\d\-]{2,}\z/i.freeze
98
JSON_POINTER_REGEX_STRING = '(\/([^~\/]|~[01])*)*'
109
JSON_POINTER_REGEX = /\A#{JSON_POINTER_REGEX_STRING}\z/.freeze
1110
RELATIVE_JSON_POINTER_REGEX = /\A(0|[1-9]\d*)(#|#{JSON_POINTER_REGEX_STRING})?\z/.freeze
@@ -72,12 +71,6 @@ def valid_date_time?(data)
7271
false
7372
end
7473

75-
def valid_email?(data)
76-
return false unless EMAIL_REGEX.match?(data)
77-
local, _domain = data.partition('@')
78-
!local.start_with?('.') && !local.end_with?('.') && !local.include?('..')
79-
end
80-
8174
def valid_ip?(data, family)
8275
IPAddr.new(data, family)
8376
IP_REGEX.match?(data)

lib/json_schemer/format/email.rb

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# frozen_string_literal: true
2+
module JSONSchemer
3+
module Format
4+
module Email
5+
# https://datatracker.ietf.org/doc/html/rfc6531#section-3.3
6+
# I think this is the same as "UTF8-non-ascii"? (https://datatracker.ietf.org/doc/html/rfc6532#section-3.1)
7+
UTF8_NON_ASCII = '[^[:ascii:]]'
8+
# https://datatracker.ietf.org/doc/html/rfc5321#section-4.1.2
9+
A_TEXT = "([\\w!#$%&'*+\\-/=?\\^`{|}~]|#{UTF8_NON_ASCII})" # atext = ALPHA / DIGIT / ; Printable US-ASCII
10+
# "!" / "#" / ; characters not including
11+
# "$" / "%" / ; specials. Used for atoms.
12+
# "&" / "'" /
13+
# "*" / "+" /
14+
# "-" / "/" /
15+
# "=" / "?" /
16+
# "^" / "_" /
17+
# "`" / "{" /
18+
# "|" / "}" /
19+
# "~"
20+
Q_TEXT_SMTP = "([\\x20-\\x21\\x23-\\x5B\\x5D-\\x7E]|#{UTF8_NON_ASCII})" # qtextSMTP = %d32-33 / %d35-91 / %d93-126
21+
# ; i.e., within a quoted string, any
22+
# ; ASCII graphic or space is permitted
23+
# ; without blackslash-quoting except
24+
# ; double-quote and the backslash itself.
25+
QUOTED_PAIR_SMTP = '\x5C[\x20-\x7E]' # quoted-pairSMTP = %d92 %d32-126
26+
# ; i.e., backslash followed by any ASCII
27+
# ; graphic (including itself) or SPace
28+
Q_CONTENT_SMTP = "#{Q_TEXT_SMTP}|#{QUOTED_PAIR_SMTP}" # QcontentSMTP = qtextSMTP / quoted-pairSMTP
29+
QUOTED_STRING = "\"(#{Q_CONTENT_SMTP})*\"" # Quoted-string = DQUOTE *QcontentSMTP DQUOTE
30+
ATOM = "#{A_TEXT}+" # Atom = 1*atext
31+
DOT_STRING = "#{ATOM}(\\.#{ATOM})*" # Dot-string = Atom *("." Atom)
32+
LOCAL_PART = "#{DOT_STRING}|#{QUOTED_STRING}" # Local-part = Dot-string / Quoted-string
33+
# ; MAY be case-sensitive
34+
# IPv4-address-literal = Snum 3("." Snum)
35+
# using `valid_id?` to check ip addresses because it's complicated. # IPv6-address-literal = "IPv6:" IPv6-addr
36+
ADDRESS_LITERAL = '\[(IPv6:(?<ipv6>[\h:]+)|(?<ipv4>[\d.]+))\]' # address-literal = "[" ( IPv4-address-literal /
37+
# IPv6-address-literal /
38+
# General-address-literal ) "]"
39+
# ; See Section 4.1.3
40+
# using `valid_hostname?` to check domain because it's complicated
41+
MAILBOX = "(#{LOCAL_PART})@(#{ADDRESS_LITERAL}|(?<domain>.+))" # Mailbox = Local-part "@" ( Domain / address-literal )
42+
EMAIL_REGEX = /\A#{MAILBOX}\z/
43+
44+
def valid_email?(data)
45+
return false unless match = EMAIL_REGEX.match(data)
46+
if ipv4 = match.named_captures.fetch('ipv4')
47+
valid_ip?(ipv4, Socket::AF_INET)
48+
elsif ipv6 = match.named_captures.fetch('ipv6')
49+
valid_ip?(ipv6, Socket::AF_INET6)
50+
else
51+
valid_hostname?(match.named_captures.fetch('domain'))
52+
end
53+
end
54+
end
55+
end
56+
end

test/format_test.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,29 @@ def test_it_allows_callable_custom_format
7373
assert(schema.valid?('valid'))
7474
refute(schema.valid?('invalid'))
7575
end
76+
77+
def test_email_format
78+
schema = JSONSchemer.schema({ 'format' => 'email' })
79+
80+
{
81+
"[email protected]" => true,
82+
"2962" => false,
83+
"[email protected]" => true,
84+
"[email protected]" => true,
85+
"[email protected]" => true,
86+
"\"joe bloggs\"@example.com" => true,
87+
"\"joe..bloggs\"@example.com" => true,
88+
"\"joe@bloggs\"@example.com" => true,
89+
"joe.bloggs@[127.0.0.1]" => true,
90+
"joe.bloggs@[IPv6:::1]" => true,
91+
"[email protected]" => false,
92+
"[email protected]" => false,
93+
"[email protected]" => true,
94+
"[email protected]" => false,
95+
"joe.bloggs@invalid=domain.com" => false,
96+
"joe.bloggs@[127.0.0.300]" => false
97+
}.each do |email, valid|
98+
assert_equal(valid, schema.valid?(email))
99+
end
100+
end
76101
end

test/uri_template_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
require 'test_helper'
22

3-
class PrettyErrorsTest < Minitest::Test
3+
class UriTemplateTest < Minitest::Test
44
def test_uri_template_format
55
schema = JSONSchemer.schema({ 'type' => 'string', 'format' => 'uri-template' })
66

0 commit comments

Comments
 (0)