22import re , sys , datetime , csv , pkgutil
33from . import net , shared
44
5- try :
5+ try :
66 from io import StringIO
77except ImportError :
88 from cStringIO import StringIO
@@ -25,13 +25,13 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
2525 destination [line [abbrev_key ]] = line [name_key ]
2626 except IOError as e :
2727 pass
28-
28+
2929airports = {}
3030countries = {}
3131states_au = {}
3232states_us = {}
3333states_ca = {}
34-
34+
3535try :
3636 reader = csv .reader (pkgdata ("airports.dat" ).splitlines ())
3737
@@ -50,7 +50,7 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
5050
5151def precompile_regexes (source , flags = 0 ):
5252 return [re .compile (regex , flags ) for regex in source ]
53-
53+
5454grammar = {
5555 "_data" : {
5656 'id' : ['Domain ID:[ ]*(?P<val>.+)' ],
@@ -157,7 +157,7 @@ def precompile_regexes(source, flags=0):
157157 '(?<=[ .]{2})(?P<val>[a-z0-9-]+\.d?ns[0-9]*\.([a-z0-9-]+\.)+[a-z0-9]+)' ,
158158 '(?<=[ .]{2})(?P<val>([a-z0-9-]+\.)+[a-z0-9]+)(\s+([0-9]{1,3}\.){3}[0-9]{1,3})' ,
159159 '(?<=[ .]{2})[^a-z0-9.-](?P<val>d?ns\.([a-z0-9-]+\.)+[a-z0-9]+)' ,
160- 'Nserver:\s*(?P<val>.+)' ],
160+ 'Nserver:\s*(?P<val>.+)' ],
161161 'emails' : ['(?P<val>[\w.-]+@[\w.-]+\.[\w]{2,6})' , # Really need to fix this, much longer TLDs now exist...
162162 '(?P<val>[\w.-]+\sAT\s[\w.-]+\sDOT\s[\w]{2,6})' ]
163163 },
@@ -235,7 +235,7 @@ def preprocess_regex(regex):
235235 "Domain Owner:\n \t (?P<organization>.+)\n \n [\s\S]*?(?:Registrant Contact:\n \t (?P<name>.+))?\n \n Registrant(?:'s)? (?:a|A)ddress:(?:\n \t (?P<street1>.+)\n (?:\t (?P<street2>.+)\n )?(?:\t (?P<street3>.+)\n )?\t (?P<city>.+)\n \t (?P<postalcode>.+))?\n \t (?P<country>.+)(?:\n \t (?P<phone>.+) \(Phone\)\n \t (?P<fax>.+) \(FAX\)\n \t (?P<email>.+))?\n \n " , # .ac.uk - what a mess...
236236 "Registrant ID: (?P<handle>.+)\n Registrant: (?P<name>.+)\n Registrant Contact Email: (?P<email>.+)" , # .cn (CNNIC)
237237 "Registrant contact:\n (?P<name>.+)\n (?P<street>.*)\n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+)\n \n " , # Fabulous.com
238- "registrant-name:\s*(?P<name>.+)\n (registrant-organization:\s*(?P<organization>.*)\n )?registrant-type:\s*(?P<type>.+)\n registrant-address:\s*(?P<street>.+)\n registrant-postcode:\s*(?P<postalcode>.+)\n registrant-city:\s*(?P<city>.+)\n registrant-country:\s*(?P<country>.+)\n (?:registrant-phone:\s*(?P<phone>.+)\n )?(?:registrant-email:\s*(?P<email>.+)\n )?" , # Hetzner
238+ "registrant-name:\s*(?P<name>.+)\n (?: registrant-organization:\s*(?P<organization>.*)\n )?registrant-type:\s*(?P<type>.+)\n registrant-address:\s*(?P<street>.+)\n registrant-postcode:\s*(?P<postalcode>.+)\n registrant-city:\s*(?P<city>.+)\n registrant-country:\s*(?P<country>.+)\n (?:registrant-phone:\s*(?P<phone>.+)\n )?(?:registrant-email:\s*(?P<email>.+)\n )?" , # Hetzner
239239 "Registrant Contact Information :[ ]*\n [ ]+(?P<firstname>.*)\n [ ]+(?P<lastname>.*)\n [ ]+(?P<organization>.*)\n [ ]+(?P<email>.*)\n [ ]+(?P<street>.*)\n [ ]+(?P<city>.*)\n [ ]+(?P<postalcode>.*)\n [ ]+(?P<phone>.*)\n [ ]+(?P<fax>.*)\n \n " , # GAL Communication
240240 "Contact Information : For Customer # [0-9]+[ ]*\n [ ]+(?P<firstname>.*)\n [ ]+(?P<lastname>.*)\n [ ]+(?P<organization>.*)\n [ ]+(?P<email>.*)\n [ ]+(?P<street>.*)\n [ ]+(?P<city>.*)\n [ ]+(?P<postalcode>.*)\n [ ]+(?P<phone>.*)\n [ ]+(?P<fax>.*)\n \n " , # GAL Communication alternative (private WHOIS) format?
241241 "Registrant:\n Name: (?P<name>.+)\n City: (?P<city>.+)\n State: (?P<state>.+)\n Country: (?P<country>.+)\n " , # Akky (.com.mx)
@@ -271,7 +271,7 @@ def preprocess_regex(regex):
271271 "Technical Contacts\n Name: (?P<name>.+)\n (?: Organization: (?P<organization>.+)\n )? ContactID: (?P<handle>.+)\n (?: Address: (?P<street1>.+)\n (?: (?P<street2>.+)\n (?: (?P<street3>.+)\n )?)? (?P<city>.+)\n (?P<postalcode>.+)\n (?P<state>.+)\n (?P<country>.+)\n )?(?: Created: (?P<creationdate>.+)\n )?(?: Last Update: (?P<changedate>.+)\n )?" , # nic.it // NOTE: Why does this say 'Contacts'? Can it have multiple?
272272 "Tech Name[.]* (?P<name>.*)\n Tech Address[.]* (?P<street1>.*)\n Tech Address[.]* (?P<street2>.*)\n (?: Tech Address[.]* (?P<street3>.*)\n )? Tech Address[.]* (?P<city>.*)\n Tech Address[.]* (?P<postalcode>.*)\n Tech Address[.]* (?P<state>.*)\n Tech Address[.]* (?P<country>.*)\n Tech Email[.]* (?P<email>.*)\n Tech Phone[.]* (?P<phone>.*)\n Tech Fax[.]* (?P<fax>.*)" , # Melbourne IT
273273 "Technical contact:\n (?: (?P<organization>.+)\n )? (?P<name>.+)\n (?P<email>.+)\n (?P<street>.+)\n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+)\n Phone: (?P<phone>.*)\n Fax: (?P<fax>.*)\n " , # Fabulous.com
274- "tech-c-name:\s*(?P<name>.+)\n (tech-c-organization:\s*(?P<organization>.*)\n )?tech-c-type:\s*(?P<type>.+)\n tech-c-address:\s*(?P<street>.+)\n tech-c-postcode:\s*(?P<postalcode>.+)\n tech-c-city:\s*(?P<city>.+)\n tech-c-country:\s*(?P<country>.+)\n (?:tech-c-phone:\s*(?P<phone>.+)\n )?(?:tech-c-email:\s*(?P<email>.+)\n )?" , # Hetzner
274+ "tech-c-name:\s*(?P<name>.+)\n (?: tech-c-organization:\s*(?P<organization>.*)\n )?tech-c-type:\s*(?P<type>.+)\n tech-c-address:\s*(?P<street>.+)\n tech-c-postcode:\s*(?P<postalcode>.+)\n tech-c-city:\s*(?P<city>.+)\n tech-c-country:\s*(?P<country>.+)\n (?:tech-c-phone:\s*(?P<phone>.+)\n )?(?:tech-c-email:\s*(?P<email>.+)\n )?" , # Hetzner
275275 "Admin Contact Information :[ ]*\n [ ]+(?P<firstname>.*)\n [ ]+(?P<lastname>.*)\n [ ]+(?P<organization>.*)\n [ ]+(?P<email>.*)\n [ ]+(?P<street>.*)\n [ ]+(?P<city>.*)\n [ ]+(?P<postalcode>.*)\n [ ]+(?P<phone>.*)\n [ ]+(?P<fax>.*)\n \n " , # GAL Communication
276276 " Technical contact:\n (?P<name>.+)\n (?P<organization>.*)\n (?P<street>.+)\n (?P<city>.+) (?P<state>\S+),[ ]+(?P<postalcode>.+)\n (?P<country>.+)\n (?P<email>.+)\n (?P<phone>.*)\n (?P<fax>.*)" , # .am
277277 "Technical:\n \s*Name:\s*(?P<name>.*)\n \s*Organisation:\s*(?P<organization>.*)\n \s*Language:.*\n \s*Phone:\s*(?P<phone>.*)\n \s*Fax:\s*(?P<fax>.*)\n \s*Email:\s*(?P<email>.*)\n " , # EURid
@@ -304,7 +304,7 @@ def preprocess_regex(regex):
304304 "Admin Contact\n Name: (?P<name>.+)\n (?: Organization: (?P<organization>.+)\n )? ContactID: (?P<handle>.+)\n (?: Address: (?P<street1>.+)\n (?: (?P<street2>.+)\n (?: (?P<street3>.+)\n )?)? (?P<city>.+)\n (?P<postalcode>.+)\n (?P<state>.+)\n (?P<country>.+)\n )?(?: Created: (?P<creationdate>.+)\n )?(?: Last Update: (?P<changedate>.+)\n )?" , # nic.it
305305 "Admin Name[.]* (?P<name>.*)\n Admin Address[.]* (?P<street1>.*)\n Admin Address[.]* (?P<street2>.*)\n (?: Admin Address[.]* (?P<street3>.*)\n )? Admin Address[.]* (?P<city>.*)\n Admin Address[.]* (?P<postalcode>.*)\n Admin Address[.]* (?P<state>.*)\n Admin Address[.]* (?P<country>.*)\n Admin Email[.]* (?P<email>.*)\n Admin Phone[.]* (?P<phone>.*)\n Admin Fax[.]* (?P<fax>.*)" , # Melbourne IT
306306 "Administrative contact:\n (?: (?P<organization>.+)\n )? (?P<name>.+)\n (?P<email>.+)\n (?P<street>.+)\n (?P<city>.+), (?P<state>.+) (?P<postalcode>.+) (?P<country>.+)\n Phone: (?P<phone>.*)\n Fax: (?P<fax>.*)\n " , # Fabulous.com
307- "admin-c-name:\s*(?P<name>.+)\n (admin-c-organization:\s*(?P<organization>.*)\n )?admin-c-type:\s*(?P<type>.+)\n admin-c-address:\s*(?P<street>.+)\n admin-c-postcode:\s*(?P<postalcode>.+)\n admin-c-city:\s*(?P<city>.+)\n admin-c-country:\s*(?P<country>.+)\n (?:admin-c-phone:\s*(?P<phone>.+)\n )?(?:admin-c-email:\s*(?P<email>.+)\n )?" , # Hetzner
307+ "admin-c-name:\s*(?P<name>.+)\n (?: admin-c-organization:\s*(?P<organization>.*)\n )?admin-c-type:\s*(?P<type>.+)\n admin-c-address:\s*(?P<street>.+)\n admin-c-postcode:\s*(?P<postalcode>.+)\n admin-c-city:\s*(?P<city>.+)\n admin-c-country:\s*(?P<country>.+)\n (?:admin-c-phone:\s*(?P<phone>.+)\n )?(?:admin-c-email:\s*(?P<email>.+)\n )?" , # Hetzner
308308 "Tech Contact Information :[ ]*\n [ ]+(?P<firstname>.*)\n [ ]+(?P<lastname>.*)\n [ ]+(?P<organization>.*)\n [ ]+(?P<email>.*)\n [ ]+(?P<street>.*)\n [ ]+(?P<city>.*)\n [ ]+(?P<postalcode>.*)\n [ ]+(?P<phone>.*)\n [ ]+(?P<fax>.*)\n \n " , # GAL Communication
309309 " Administrative contact:\n (?P<name>.+)\n (?P<organization>.*)\n (?P<street>.+)\n (?P<city>.+) (?P<state>\S+),[ ]+(?P<postalcode>.+)\n (?P<country>.+)\n (?P<email>.+)\n (?P<phone>.*)\n (?P<fax>.*)" , # .am
310310 "Administrative Contact:\n Name: (?P<name>.+)\n City: (?P<city>.+)\n State: (?P<state>.+)\n Country: (?P<country>.+)\n " , # Akky (.com.mx)
@@ -553,7 +553,7 @@ def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_
553553 data ["nameservers" ].append (match .strip ())
554554 except KeyError as e :
555555 data ["nameservers" ] = [match .strip ()]
556-
556+
557557
558558 data ["contacts" ] = parse_registrants (raw_data , never_query_handles , handle_server )
559559
@@ -645,7 +645,7 @@ def normalize_data(data, normalized):
645645 for country , source in (("united states" , states_us ), ("australia" , states_au ), ("canada" , states_ca )):
646646 if country in contact ["country" ].lower () and contact ["state" ] in source :
647647 contact ["state" ] = source [contact ["state" ]]
648-
648+
649649 for key in ("email" ,):
650650 if key in contact and contact [key ] is not None and (normalized == True or key in normalized ):
651651 if is_string (contact [key ]):
@@ -660,7 +660,7 @@ def normalize_data(data, normalized):
660660 for key in ("city" , "organization" , "state" , "country" ):
661661 if key in contact and contact [key ] is not None and (normalized == True or key in normalized ):
662662 contact [key ] = normalize_name (contact [key ], abbreviation_threshold = 3 , length_threshold = 3 )
663-
663+
664664 if "name" in contact and "organization" not in contact :
665665 lines = [x .strip () for x in contact ["name" ].splitlines ()]
666666 new_lines = []
@@ -674,10 +674,10 @@ def normalize_data(data, normalized):
674674 contact ["name" ] = "\n " .join (lines )
675675 else :
676676 del contact ["name" ]
677-
677+
678678 if len (new_lines ) > 0 :
679679 contact ["organization" ] = "\n " .join (new_lines )
680-
680+
681681 if "street" in contact and "organization" not in contact :
682682 lines = [x .strip () for x in contact ["street" ].splitlines ()]
683683 if len (lines ) > 1 :
@@ -686,7 +686,7 @@ def normalize_data(data, normalized):
686686 contact ["organization" ] = lines [0 ]
687687 contact ["street" ] = "\n " .join (lines [1 :])
688688 break
689-
689+
690690 for key in list (contact .keys ()):
691691 try :
692692 contact [key ] = contact [key ].strip (", " )
@@ -831,10 +831,10 @@ def remove_suffixes(data):
831831 # Removes everything before and after the first non-whitespace continuous string.
832832 # Used to get rid of IP suffixes for nameservers.
833833 cleaned_list = []
834-
834+
835835 for entry in data :
836836 cleaned_list .append (re .search ("([^\s]+)\s*[\s]*" , entry ).group (1 ).lstrip ())
837-
837+
838838 return cleaned_list
839839
840840def parse_registrants (data , never_query_handles = True , handle_server = "" ):
@@ -911,7 +911,7 @@ def parse_registrants(data, never_query_handles=True, handle_server=""):
911911 elif category == "admin" :
912912 admin_contact = data_reference
913913 break
914-
914+
915915 # Post-processing
916916 for obj in (registrant , tech_contact , billing_contact , admin_contact ):
917917 if obj is not None :
@@ -986,18 +986,18 @@ def fetch_nic_contact(handle, lookup_server):
986986 response = net .get_whois_raw (handle , lookup_server )
987987 response = [segment .replace ("\r " , "" ) for segment in response ] # Carriage returns are the devil
988988 results = parse_nic_contact (response )
989-
989+
990990 if len (results ) > 0 :
991991 return results [0 ]
992992 else :
993993 raise shared .WhoisException ("No contact data found in the response." )
994-
994+
995995def parse_nic_contact (data ):
996996 handle_contacts = []
997997 for regex in nic_contact_regexes :
998998 for segment in data :
999999 matches = re .finditer (regex , segment )
10001000 for match in matches :
10011001 handle_contacts .append (match .groupdict ())
1002-
1002+
10031003 return handle_contacts
0 commit comments