@@ -90,29 +90,237 @@ def clean_email_address(address)
9090
9191 domain = clean_domain ( domain )
9292
93- if domain == 'yahoo.com'
93+ if YAHOO_DOMAINS . key? ( domain )
9494 local_part . sub! ( /\A ([^-]+)-.*\z / , '\1' )
9595 else
9696 local_part . sub! ( /\A ([^+]+)\+ .*\z / , '\1' )
9797 end
9898
99+ if domain == 'gmail.com'
100+ local_part . gsub! ( '.' , '' )
101+ end
102+
103+ domain_parts = domain . split ( '.' )
104+ if domain_parts . length > 2
105+ possible_domain = domain_parts [ 1 ..] . join ( '.' )
106+ if FASTMAIL_DOMAINS . key? ( possible_domain )
107+ domain = possible_domain
108+ if local_part != ''
109+ local_part = domain_parts [ 0 ]
110+ end
111+ end
112+ end
113+
99114 "#{ local_part } @#{ domain } "
100115 end
101116
102117 TYPO_DOMAINS = {
103118 # gmail.com
104- '35gmai.com' => 'gmail.com' ,
105- '636gmail.com' => 'gmail.com' ,
119+ 'gmai.com' => 'gmail.com' ,
106120 'gamil.com' => 'gmail.com' ,
107- 'gmail.comu' => 'gmail.com' ,
121+ 'gmali.com' => 'gmail.com' ,
108122 'gmial.com' => 'gmail.com' ,
109123 'gmil.com' => 'gmail.com' ,
124+ 'gmaill.com' => 'gmail.com' ,
125+ 'gmailm.com' => 'gmail.com' ,
126+ 'gmailo.com' => 'gmail.com' ,
127+ 'gmailyhoo.com' => 'gmail.com' ,
110128 'yahoogmail.com' => 'gmail.com' ,
111129 # outlook.com
112130 'putlook.com' => 'outlook.com' ,
113131 } . freeze
114132 private_constant :TYPO_DOMAINS
115133
134+ EQUIVALENT_DOMAINS = {
135+ 'googlemail.com' => 'gmail.com' ,
136+ 'pm.me' => 'protonmail.com' ,
137+ 'proton.me' => 'protonmail.com' ,
138+ 'yandex.by' => 'yandex.ru' ,
139+ 'yandex.com' => 'yandex.ru' ,
140+ 'yandex.kz' => 'yandex.ru' ,
141+ 'yandex.ua' => 'yandex.ru' ,
142+ 'ya.ru' => 'yandex.ru' ,
143+ } . freeze
144+ private_constant :EQUIVALENT_DOMAINS
145+
146+ FASTMAIL_DOMAINS = {
147+ '123mail.org' => true ,
148+ '150mail.com' => true ,
149+ '150ml.com' => true ,
150+ '16mail.com' => true ,
151+ '2-mail.com' => true ,
152+ '4email.net' => true ,
153+ '50mail.com' => true ,
154+ 'airpost.net' => true ,
155+ 'allmail.net' => true ,
156+ 'bestmail.us' => true ,
157+ 'cluemail.com' => true ,
158+ 'elitemail.org' => true ,
159+ 'emailcorner.net' => true ,
160+ 'emailengine.net' => true ,
161+ 'emailengine.org' => true ,
162+ 'emailgroups.net' => true ,
163+ 'emailplus.org' => true ,
164+ 'emailuser.net' => true ,
165+ 'eml.cc' => true ,
166+ 'f-m.fm' => true ,
167+ 'fast-email.com' => true ,
168+ 'fast-mail.org' => true ,
169+ 'fastem.com' => true ,
170+ 'fastemail.us' => true ,
171+ 'fastemailer.com' => true ,
172+ 'fastest.cc' => true ,
173+ 'fastimap.com' => true ,
174+ 'fastmail.cn' => true ,
175+ 'fastmail.co.uk' => true ,
176+ 'fastmail.com' => true ,
177+ 'fastmail.com.au' => true ,
178+ 'fastmail.de' => true ,
179+ 'fastmail.es' => true ,
180+ 'fastmail.fm' => true ,
181+ 'fastmail.fr' => true ,
182+ 'fastmail.im' => true ,
183+ 'fastmail.in' => true ,
184+ 'fastmail.jp' => true ,
185+ 'fastmail.mx' => true ,
186+ 'fastmail.net' => true ,
187+ 'fastmail.nl' => true ,
188+ 'fastmail.org' => true ,
189+ 'fastmail.se' => true ,
190+ 'fastmail.to' => true ,
191+ 'fastmail.tw' => true ,
192+ 'fastmail.uk' => true ,
193+ 'fastmail.us' => true ,
194+ 'fastmailbox.net' => true ,
195+ 'fastmessaging.com' => true ,
196+ 'fea.st' => true ,
197+ 'fmail.co.uk' => true ,
198+ 'fmailbox.com' => true ,
199+ 'fmgirl.com' => true ,
200+ 'fmguy.com' => true ,
201+ 'ftml.net' => true ,
202+ 'h-mail.us' => true ,
203+ 'hailmail.net' => true ,
204+ 'imap-mail.com' => true ,
205+ 'imap.cc' => true ,
206+ 'imapmail.org' => true ,
207+ 'inoutbox.com' => true ,
208+ 'internet-e-mail.com' => true ,
209+ 'internet-mail.org' => true ,
210+ 'internetemails.net' => true ,
211+ 'internetmailing.net' => true ,
212+ 'jetemail.net' => true ,
213+ 'justemail.net' => true ,
214+ 'letterboxes.org' => true ,
215+ 'mail-central.com' => true ,
216+ 'mail-page.com' => true ,
217+ 'mailandftp.com' => true ,
218+ 'mailas.com' => true ,
219+ 'mailbolt.com' => true ,
220+ 'mailc.net' => true ,
221+ 'mailcan.com' => true ,
222+ 'mailforce.net' => true ,
223+ 'mailftp.com' => true ,
224+ 'mailhaven.com' => true ,
225+ 'mailingaddress.org' => true ,
226+ 'mailite.com' => true ,
227+ 'mailmight.com' => true ,
228+ 'mailnew.com' => true ,
229+ 'mailsent.net' => true ,
230+ 'mailservice.ms' => true ,
231+ 'mailup.net' => true ,
232+ 'mailworks.org' => true ,
233+ 'ml1.net' => true ,
234+ 'mm.st' => true ,
235+ 'myfastmail.com' => true ,
236+ 'mymacmail.com' => true ,
237+ 'nospammail.net' => true ,
238+ 'ownmail.net' => true ,
239+ 'petml.com' => true ,
240+ 'postinbox.com' => true ,
241+ 'postpro.net' => true ,
242+ 'proinbox.com' => true ,
243+ 'promessage.com' => true ,
244+ 'realemail.net' => true ,
245+ 'reallyfast.biz' => true ,
246+ 'reallyfast.info' => true ,
247+ 'rushpost.com' => true ,
248+ 'sent.as' => true ,
249+ 'sent.at' => true ,
250+ 'sent.com' => true ,
251+ 'speedpost.net' => true ,
252+ 'speedymail.org' => true ,
253+ 'ssl-mail.com' => true ,
254+ 'swift-mail.com' => true ,
255+ 'the-fastest.net' => true ,
256+ 'the-quickest.com' => true ,
257+ 'theinternetemail.com' => true ,
258+ 'veryfast.biz' => true ,
259+ 'veryspeedy.net' => true ,
260+ 'warpmail.net' => true ,
261+ 'xsmail.com' => true ,
262+ 'yepmail.net' => true ,
263+ 'your-mail.com' => true ,
264+ } . freeze
265+ private_constant :FASTMAIL_DOMAINS
266+
267+ YAHOO_DOMAINS = {
268+ 'y7mail.com' => true ,
269+ 'yahoo.at' => true ,
270+ 'yahoo.be' => true ,
271+ 'yahoo.bg' => true ,
272+ 'yahoo.ca' => true ,
273+ 'yahoo.cl' => true ,
274+ 'yahoo.co.id' => true ,
275+ 'yahoo.co.il' => true ,
276+ 'yahoo.co.in' => true ,
277+ 'yahoo.co.kr' => true ,
278+ 'yahoo.co.nz' => true ,
279+ 'yahoo.co.th' => true ,
280+ 'yahoo.co.uk' => true ,
281+ 'yahoo.co.za' => true ,
282+ 'yahoo.com' => true ,
283+ 'yahoo.com.ar' => true ,
284+ 'yahoo.com.au' => true ,
285+ 'yahoo.com.br' => true ,
286+ 'yahoo.com.co' => true ,
287+ 'yahoo.com.hk' => true ,
288+ 'yahoo.com.hr' => true ,
289+ 'yahoo.com.mx' => true ,
290+ 'yahoo.com.my' => true ,
291+ 'yahoo.com.pe' => true ,
292+ 'yahoo.com.ph' => true ,
293+ 'yahoo.com.sg' => true ,
294+ 'yahoo.com.tr' => true ,
295+ 'yahoo.com.tw' => true ,
296+ 'yahoo.com.ua' => true ,
297+ 'yahoo.com.ve' => true ,
298+ 'yahoo.com.vn' => true ,
299+ 'yahoo.cz' => true ,
300+ 'yahoo.de' => true ,
301+ 'yahoo.dk' => true ,
302+ 'yahoo.ee' => true ,
303+ 'yahoo.es' => true ,
304+ 'yahoo.fi' => true ,
305+ 'yahoo.fr' => true ,
306+ 'yahoo.gr' => true ,
307+ 'yahoo.hu' => true ,
308+ 'yahoo.ie' => true ,
309+ 'yahoo.in' => true ,
310+ 'yahoo.it' => true ,
311+ 'yahoo.lt' => true ,
312+ 'yahoo.lv' => true ,
313+ 'yahoo.nl' => true ,
314+ 'yahoo.no' => true ,
315+ 'yahoo.pl' => true ,
316+ 'yahoo.pt' => true ,
317+ 'yahoo.ro' => true ,
318+ 'yahoo.se' => true ,
319+ 'yahoo.sk' => true ,
320+ 'ymail.com' => true ,
321+ } . freeze
322+ private_constant :YAHOO_DOMAINS
323+
116324 def clean_domain ( domain )
117325 domain = domain . strip
118326
@@ -121,10 +329,19 @@ def clean_domain(domain)
121329
122330 domain = SimpleIDN . to_ascii ( domain )
123331
332+ domain . sub! ( /(?:\. com){2,}$/ , '.com' )
333+ domain . sub! ( /\. com[^.]+$/ , '.com' )
334+ domain . sub! ( /(?:\. (?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$/ , '.com' )
335+ domain . sub! ( /^\d +(?:gmail?\. com)$/ , 'gmail.com' )
336+
124337 if TYPO_DOMAINS . key? ( domain )
125338 domain = TYPO_DOMAINS [ domain ]
126339 end
127340
341+ if EQUIVALENT_DOMAINS . key? ( domain )
342+ domain = EQUIVALENT_DOMAINS [ domain ]
343+ end
344+
128345 domain
129346 end
130347 end
0 commit comments