Skip to content

Commit 6b0a459

Browse files
authored
Merge pull request #1294 from maxmind/horgh/normalize
Replace fewer TLDs when normalizing
2 parents fc50f8b + 1b0256b commit 6b0a459

File tree

3 files changed

+65
-12
lines changed

3 files changed

+65
-12
lines changed

CHANGELOG.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,8 @@ CHANGELOG
2020
* Duplicate `.com`s are now removed from email domain names when
2121
`hashAddress` is used. For example, `example.com.com` will become
2222
`example.com`.
23-
* Extraneous characters after `.com` are now removed from email domain
24-
names when `hashAddress` is used. For example, `example.comfoo` will
25-
become `example.com`.
26-
* Certain `.com` typos are now normalized to `.com` when `hashAddress` is
27-
used. For example, `example.cam` will become `example.com`.
23+
* Certain TLD typos are now normalized when `hashAddress` is used. For
24+
example, `example.comcom` will become `example.com`.
2825
* Additional `gmail.com` domain names with leading digits are now
2926
normalized when `hashAddress` is used. For example, `100gmail.com` will
3027
become `gmail.com`.

src/request/email.spec.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,19 +180,24 @@ describe('Email()', () => {
180180
},
181181
{
182182
183-
md5: md5('foo@example.com'),
183+
md5: md5('foo@example.comfoo'),
184184
domain: 'example.comfoo',
185185
},
186186
{
187187
188-
md5: md5('foo@example.com'),
188+
md5: md5('foo@example.cam'),
189189
domain: 'example.cam',
190190
},
191191
{
192192
193193
md5: md5('[email protected]'),
194194
domain: '10000gmail.com',
195195
},
196+
{
197+
198+
md5: md5('[email protected]'),
199+
domain: 'example.comcom',
200+
},
196201
];
197202

198203
test.each(normalizeTests)('%p', (arg) => {

src/request/email.ts

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,54 @@ export default class Email implements EmailProps {
4848
'putlook.com': 'outlook.com',
4949
};
5050

51+
private static readonly typoTLDs: { [key: string]: string } = {
52+
comm: 'com',
53+
commm: 'com',
54+
commmm: 'com',
55+
comn: 'com',
56+
57+
cbm: 'com',
58+
ccm: 'com',
59+
cdm: 'com',
60+
cem: 'com',
61+
cfm: 'com',
62+
cgm: 'com',
63+
chm: 'com',
64+
cim: 'com',
65+
cjm: 'com',
66+
ckm: 'com',
67+
clm: 'com',
68+
cmm: 'com',
69+
cnm: 'com',
70+
cpm: 'com',
71+
cqm: 'com',
72+
crm: 'com',
73+
csm: 'com',
74+
ctm: 'com',
75+
cum: 'com',
76+
cvm: 'com',
77+
cwm: 'com',
78+
cxm: 'com',
79+
cym: 'com',
80+
czm: 'com',
81+
82+
col: 'com',
83+
con: 'com',
84+
85+
dom: 'com',
86+
don: 'com',
87+
som: 'com',
88+
son: 'com',
89+
vom: 'com',
90+
von: 'com',
91+
xom: 'com',
92+
xon: 'com',
93+
94+
clam: 'com',
95+
colm: 'com',
96+
comcom: 'com',
97+
};
98+
5199
private static readonly equivalentDomains: { [key: string]: string } = {
52100
'googlemail.com': 'gmail.com',
53101
'pm.me': 'protonmail.com',
@@ -318,13 +366,16 @@ export default class Email implements EmailProps {
318366
domain = punycode.toASCII(domain);
319367

320368
domain = domain.replace(/(?:\.com){2,}$/, '.com');
321-
domain = domain.replace(/\.com[^.]+$/, '.com');
322-
domain = domain.replace(
323-
/(?:\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$/,
324-
'.com'
325-
);
326369
domain = domain.replace(/^\d+(?:gmail?\.com)$/, 'gmail.com');
327370

371+
const idx = domain.lastIndexOf('.');
372+
if (idx !== -1) {
373+
const tld = domain.substring(idx + 1);
374+
if (Object.prototype.hasOwnProperty.call(Email.typoTLDs, tld)) {
375+
domain = domain.substring(0, idx) + '.' + Email.typoTLDs[tld];
376+
}
377+
}
378+
328379
if (Object.prototype.hasOwnProperty.call(Email.typoDomains, domain)) {
329380
domain = Email.typoDomains[domain];
330381
}

0 commit comments

Comments
 (0)