Skip to content

Commit fc50f8b

Browse files
authored
Merge pull request #1293 from maxmind/horgh/email-normalize
Add additional email normalization
2 parents 1348bb4 + 0032235 commit fc50f8b

File tree

3 files changed

+308
-7
lines changed

3 files changed

+308
-7
lines changed

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,31 @@ CHANGELOG
55
------------------
66

77
* Added `PxpFinancial` and `Trustpay` to the `Processor` enum.
8+
* Equivalent domain names are now normalized when `hashAddress` is used.
9+
For example, `googlemail.com` will become `gmail.com`.
10+
* Periods are now removed from `gmail.com` email address local parts when
11+
`hashAddress` is used. For example, `[email protected]` will become
12+
13+
* Fastmail alias subdomain email addresses are now normalized when
14+
`hashAddress` is used. For example, `[email protected]` will become
15+
16+
* Additional `yahoo.com` email addresses now have aliases removed from
17+
their local part when `hashAddress` is used. For example,
18+
`[email protected]` will become `[email protected]` for additional
19+
`yahoo.com` domains.
20+
* Duplicate `.com`s are now removed from email domain names when
21+
`hashAddress` is used. For example, `example.com.com` will become
22+
`example.com`.
23+
* Extraneous characters after `.com` are now removed from email domain
24+
names when `hashAddress` is used. For example, `example.comfoo` will
25+
become `example.com`.
26+
* Certain `.com` typos are now normalized to `.com` when `hashAddress` is
27+
used. For example, `example.cam` will become `example.com`.
28+
* Additional `gmail.com` domain names with leading digits are now
29+
normalized when `hashAddress` is used. For example, `100gmail.com` will
30+
become `gmail.com`.
31+
* Additional `gmail.com` typos are now normalized when `hashAddress` is
32+
used. For example, `gmali.com` will become `gmail.com`.
833

934
6.0.0 (2023-12-05)
1035
------------------

src/request/email.spec.ts

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { ArgumentError } from '../errors';
2+
import crypto from 'crypto';
23
import Email from './email';
34

45
describe('Email()', () => {
@@ -66,6 +67,10 @@ describe('Email()', () => {
6667
expect(email.domain).toBe('bar.com');
6768
});
6869

70+
const md5 = (s: string): string => {
71+
return crypto.createHash('md5').update(s).digest('hex');
72+
};
73+
6974
const normalizeTests = [
7075
{
7176
@@ -143,6 +148,51 @@ describe('Email()', () => {
143148
// 'test' is rejected as invalid.
144149
// 'test@' is rejected as invalid.
145150
// 'test@.' is rejected as invalid.
151+
{
152+
153+
md5: md5('[email protected]'),
154+
domain: 'googlemail.com',
155+
},
156+
{
157+
158+
md5: md5('[email protected]'),
159+
domain: 'gmail.com',
160+
},
161+
{
162+
163+
md5: md5('[email protected]'),
164+
domain: 'user.fastmail.com',
165+
},
166+
{
167+
168+
md5: md5('[email protected]'),
169+
domain: 'bar.example.com',
170+
},
171+
{
172+
173+
md5: md5('[email protected]'),
174+
domain: 'ymail.com',
175+
},
176+
{
177+
178+
md5: md5('[email protected]'),
179+
domain: 'example.com.com',
180+
},
181+
{
182+
183+
md5: md5('[email protected]'),
184+
domain: 'example.comfoo',
185+
},
186+
{
187+
188+
md5: md5('[email protected]'),
189+
domain: 'example.cam',
190+
},
191+
{
192+
193+
md5: md5('[email protected]'),
194+
domain: '10000gmail.com',
195+
},
146196
];
147197

148198
test.each(normalizeTests)('%p', (arg) => {

src/request/email.ts

Lines changed: 233 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,207 @@ export default class Email implements EmailProps {
3434

3535
private static readonly typoDomains: { [key: string]: string } = {
3636
// gmail.com
37-
'35gmai.com': 'gmail.com',
38-
'636gmail.com': 'gmail.com',
37+
'gmai.com': 'gmail.com',
3938
'gamil.com': 'gmail.com',
40-
'gmail.comu': 'gmail.com',
39+
'gmali.com': 'gmail.com',
4140
'gmial.com': 'gmail.com',
4241
'gmil.com': 'gmail.com',
42+
'gmaill.com': 'gmail.com',
43+
'gmailm.com': 'gmail.com',
44+
'gmailo.com': 'gmail.com',
45+
'gmailyhoo.com': 'gmail.com',
4346
'yahoogmail.com': 'gmail.com',
4447
// outlook.com
4548
'putlook.com': 'outlook.com',
4649
};
4750

51+
private static readonly equivalentDomains: { [key: string]: string } = {
52+
'googlemail.com': 'gmail.com',
53+
'pm.me': 'protonmail.com',
54+
'proton.me': 'protonmail.com',
55+
'yandex.by': 'yandex.ru',
56+
'yandex.com': 'yandex.ru',
57+
'yandex.kz': 'yandex.ru',
58+
'yandex.ua': 'yandex.ru',
59+
'ya.ru': 'yandex.ru',
60+
};
61+
62+
private static readonly fastmailDomains: { [key: string]: boolean } = {
63+
'123mail.org': true,
64+
'150mail.com': true,
65+
'150ml.com': true,
66+
'16mail.com': true,
67+
'2-mail.com': true,
68+
'4email.net': true,
69+
'50mail.com': true,
70+
'airpost.net': true,
71+
'allmail.net': true,
72+
'bestmail.us': true,
73+
'cluemail.com': true,
74+
'elitemail.org': true,
75+
'emailcorner.net': true,
76+
'emailengine.net': true,
77+
'emailengine.org': true,
78+
'emailgroups.net': true,
79+
'emailplus.org': true,
80+
'emailuser.net': true,
81+
'eml.cc': true,
82+
'f-m.fm': true,
83+
'fast-email.com': true,
84+
'fast-mail.org': true,
85+
'fastem.com': true,
86+
'fastemail.us': true,
87+
'fastemailer.com': true,
88+
'fastest.cc': true,
89+
'fastimap.com': true,
90+
'fastmail.cn': true,
91+
'fastmail.co.uk': true,
92+
'fastmail.com': true,
93+
'fastmail.com.au': true,
94+
'fastmail.de': true,
95+
'fastmail.es': true,
96+
'fastmail.fm': true,
97+
'fastmail.fr': true,
98+
'fastmail.im': true,
99+
'fastmail.in': true,
100+
'fastmail.jp': true,
101+
'fastmail.mx': true,
102+
'fastmail.net': true,
103+
'fastmail.nl': true,
104+
'fastmail.org': true,
105+
'fastmail.se': true,
106+
'fastmail.to': true,
107+
'fastmail.tw': true,
108+
'fastmail.uk': true,
109+
'fastmail.us': true,
110+
'fastmailbox.net': true,
111+
'fastmessaging.com': true,
112+
'fea.st': true,
113+
'fmail.co.uk': true,
114+
'fmailbox.com': true,
115+
'fmgirl.com': true,
116+
'fmguy.com': true,
117+
'ftml.net': true,
118+
'h-mail.us': true,
119+
'hailmail.net': true,
120+
'imap-mail.com': true,
121+
'imap.cc': true,
122+
'imapmail.org': true,
123+
'inoutbox.com': true,
124+
'internet-e-mail.com': true,
125+
'internet-mail.org': true,
126+
'internetemails.net': true,
127+
'internetmailing.net': true,
128+
'jetemail.net': true,
129+
'justemail.net': true,
130+
'letterboxes.org': true,
131+
'mail-central.com': true,
132+
'mail-page.com': true,
133+
'mailandftp.com': true,
134+
'mailas.com': true,
135+
'mailbolt.com': true,
136+
'mailc.net': true,
137+
'mailcan.com': true,
138+
'mailforce.net': true,
139+
'mailftp.com': true,
140+
'mailhaven.com': true,
141+
'mailingaddress.org': true,
142+
'mailite.com': true,
143+
'mailmight.com': true,
144+
'mailnew.com': true,
145+
'mailsent.net': true,
146+
'mailservice.ms': true,
147+
'mailup.net': true,
148+
'mailworks.org': true,
149+
'ml1.net': true,
150+
'mm.st': true,
151+
'myfastmail.com': true,
152+
'mymacmail.com': true,
153+
'nospammail.net': true,
154+
'ownmail.net': true,
155+
'petml.com': true,
156+
'postinbox.com': true,
157+
'postpro.net': true,
158+
'proinbox.com': true,
159+
'promessage.com': true,
160+
'realemail.net': true,
161+
'reallyfast.biz': true,
162+
'reallyfast.info': true,
163+
'rushpost.com': true,
164+
'sent.as': true,
165+
'sent.at': true,
166+
'sent.com': true,
167+
'speedpost.net': true,
168+
'speedymail.org': true,
169+
'ssl-mail.com': true,
170+
'swift-mail.com': true,
171+
'the-fastest.net': true,
172+
'the-quickest.com': true,
173+
'theinternetemail.com': true,
174+
'veryfast.biz': true,
175+
'veryspeedy.net': true,
176+
'warpmail.net': true,
177+
'xsmail.com': true,
178+
'yepmail.net': true,
179+
'your-mail.com': true,
180+
};
181+
182+
private static readonly yahooDomains: { [key: string]: boolean } = {
183+
'y7mail.com': true,
184+
'yahoo.at': true,
185+
'yahoo.be': true,
186+
'yahoo.bg': true,
187+
'yahoo.ca': true,
188+
'yahoo.cl': true,
189+
'yahoo.co.id': true,
190+
'yahoo.co.il': true,
191+
'yahoo.co.in': true,
192+
'yahoo.co.kr': true,
193+
'yahoo.co.nz': true,
194+
'yahoo.co.th': true,
195+
'yahoo.co.uk': true,
196+
'yahoo.co.za': true,
197+
'yahoo.com': true,
198+
'yahoo.com.ar': true,
199+
'yahoo.com.au': true,
200+
'yahoo.com.br': true,
201+
'yahoo.com.co': true,
202+
'yahoo.com.hk': true,
203+
'yahoo.com.hr': true,
204+
'yahoo.com.mx': true,
205+
'yahoo.com.my': true,
206+
'yahoo.com.pe': true,
207+
'yahoo.com.ph': true,
208+
'yahoo.com.sg': true,
209+
'yahoo.com.tr': true,
210+
'yahoo.com.tw': true,
211+
'yahoo.com.ua': true,
212+
'yahoo.com.ve': true,
213+
'yahoo.com.vn': true,
214+
'yahoo.cz': true,
215+
'yahoo.de': true,
216+
'yahoo.dk': true,
217+
'yahoo.ee': true,
218+
'yahoo.es': true,
219+
'yahoo.fi': true,
220+
'yahoo.fr': true,
221+
'yahoo.gr': true,
222+
'yahoo.hu': true,
223+
'yahoo.ie': true,
224+
'yahoo.in': true,
225+
'yahoo.it': true,
226+
'yahoo.lt': true,
227+
'yahoo.lv': true,
228+
'yahoo.nl': true,
229+
'yahoo.no': true,
230+
'yahoo.pl': true,
231+
'yahoo.pt': true,
232+
'yahoo.ro': true,
233+
'yahoo.se': true,
234+
'yahoo.sk': true,
235+
'ymail.com': true,
236+
};
237+
48238
public constructor(email: EmailProps) {
49239
if (email.address != null && !isEmail(email.address)) {
50240
throw new ArgumentError('`email.address` is an invalid email address');
@@ -85,28 +275,64 @@ export default class Email implements EmailProps {
85275

86276
domain = this.cleanDomain(domain);
87277

88-
const separator = domain === 'yahoo.com' ? '-' : '+';
278+
let separator = '+';
279+
if (Object.prototype.hasOwnProperty.call(Email.yahooDomains, domain)) {
280+
separator = '-';
281+
}
282+
89283
const separatorIndex = localPart.indexOf(separator);
90284
if (separatorIndex > 0) {
91285
localPart = localPart.substring(0, separatorIndex);
92286
}
93287

288+
if (domain === 'gmail.com') {
289+
localPart = localPart.replace(/\./g, '');
290+
}
291+
292+
const domainParts = domain.split('.');
293+
if (domainParts.length > 2) {
294+
const possibleDomain = domainParts.slice(1).join('.');
295+
if (
296+
Object.prototype.hasOwnProperty.call(
297+
Email.fastmailDomains,
298+
possibleDomain
299+
)
300+
) {
301+
domain = possibleDomain;
302+
if (localPart !== '') {
303+
localPart = domainParts[0];
304+
}
305+
}
306+
}
307+
94308
return localPart + '@' + domain;
95309
}
96310

97311
private cleanDomain(domain: string) {
98312
// We don't need to trim the domain as if it has any leading whitespace
99-
// validation rejects it as invalid.
313+
// validation (isEmail()) rejects it as invalid.
100314

101-
// We don't need to strip a trailing '.' because validation rejects domains
102-
// that have it.
315+
// We don't need to strip a trailing '.' because validation (isEmail())
316+
// rejects domains that have it.
103317

104318
domain = punycode.toASCII(domain);
105319

320+
domain = domain.replace(/(?:\.com){2,}$/, '.com');
321+
domain = domain.replace(/\.com[^.]+$/, '.com');
322+
domain = domain.replace(
323+
/(?:\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$/,
324+
'.com'
325+
);
326+
domain = domain.replace(/^\d+(?:gmail?\.com)$/, 'gmail.com');
327+
106328
if (Object.prototype.hasOwnProperty.call(Email.typoDomains, domain)) {
107329
domain = Email.typoDomains[domain];
108330
}
109331

332+
if (Object.prototype.hasOwnProperty.call(Email.equivalentDomains, domain)) {
333+
domain = Email.equivalentDomains[domain];
334+
}
335+
110336
return domain;
111337
}
112338
}

0 commit comments

Comments
 (0)