@@ -48,6 +48,9 @@ def read_dataset(filename, destination, abbrev_key, name_key, is_dict=False):
4848read_dataset ("states_us.dat" , states_us , "abbreviation" , "name" , is_dict = True )
4949read_dataset ("states_ca.dat" , states_ca , "abbreviation" , "name" , is_dict = True )
5050
51+ def precompile_regexes (source , flags = 0 ):
52+ return [re .compile (regex , flags ) for regex in source ]
53+
5154grammar = {
5255 "_data" : {
5356 'id' : ['Domain ID:[ ]*(?P<val>.+)' ],
@@ -389,6 +392,30 @@ def preprocess_regex(regex):
389392 r"\ss\.?a\.?r\.?l\.?($|\s)" ,
390393)
391394
395+ grammar ["_data" ]["id" ] = precompile_regexes (grammar ["_data" ]["id" ], re .IGNORECASE )
396+ grammar ["_data" ]["status" ] = precompile_regexes (grammar ["_data" ]["status" ], re .IGNORECASE )
397+ grammar ["_data" ]["creation_date" ] = precompile_regexes (grammar ["_data" ]["creation_date" ], re .IGNORECASE )
398+ grammar ["_data" ]["expiration_date" ] = precompile_regexes (grammar ["_data" ]["expiration_date" ], re .IGNORECASE )
399+ grammar ["_data" ]["updated_date" ] = precompile_regexes (grammar ["_data" ]["updated_date" ], re .IGNORECASE )
400+ grammar ["_data" ]["registrar" ] = precompile_regexes (grammar ["_data" ]["registrar" ], re .IGNORECASE )
401+ grammar ["_data" ]["whois_server" ] = precompile_regexes (grammar ["_data" ]["whois_server" ], re .IGNORECASE )
402+ grammar ["_data" ]["nameservers" ] = precompile_regexes (grammar ["_data" ]["nameservers" ], re .IGNORECASE )
403+ grammar ["_data" ]["emails" ] = precompile_regexes (grammar ["_data" ]["emails" ], re .IGNORECASE )
404+
405+ grammar ["_dateformats" ] = precompile_regexes (grammar ["_dateformats" ], re .IGNORECASE )
406+
407+ registrant_regexes = precompile_regexes (registrant_regexes )
408+ tech_contact_regexes = precompile_regexes (tech_contact_regexes )
409+ billing_contact_regexes = precompile_regexes (billing_contact_regexes )
410+ admin_contact_regexes = precompile_regexes (admin_contact_regexes )
411+ nic_contact_regexes = precompile_regexes (nic_contact_regexes )
412+ organization_regexes = precompile_regexes (organization_regexes , re .IGNORECASE )
413+
414+ nic_contact_references ["registrant" ] = precompile_regexes (nic_contact_references ["registrant" ])
415+ nic_contact_references ["tech" ] = precompile_regexes (nic_contact_references ["tech" ])
416+ nic_contact_references ["admin" ] = precompile_regexes (nic_contact_references ["admin" ])
417+ nic_contact_references ["billing" ] = precompile_regexes (nic_contact_references ["billing" ])
418+
392419if sys .version_info < (3 , 0 ):
393420 def is_string (data ):
394421 """Test for string with support for python 2."""
@@ -409,7 +436,7 @@ def parse_raw_whois(raw_data, normalized=[], never_query_handles=True, handle_se
409436 if (rule_key in data ) == False :
410437 for line in segment .splitlines ():
411438 for regex in rule_regexes :
412- result = re .search (regex , line , re . IGNORECASE )
439+ result = re .search (regex , line )
413440
414441 if result is not None :
415442 val = result .group ("val" ).strip ()
@@ -634,7 +661,7 @@ def normalize_data(data, normalized):
634661 new_lines = []
635662 for i , line in enumerate (lines ):
636663 for regex in organization_regexes :
637- if re .search (regex , line , re . IGNORECASE ):
664+ if re .search (regex , line ):
638665 new_lines .append (line )
639666 del lines [i ]
640667 break
@@ -650,7 +677,7 @@ def normalize_data(data, normalized):
650677 lines = [x .strip () for x in contact ["street" ].splitlines ()]
651678 if len (lines ) > 1 :
652679 for regex in organization_regexes :
653- if re .search (regex , lines [0 ], re . IGNORECASE ):
680+ if re .search (regex , lines [0 ]):
654681 contact ["organization" ] = lines [0 ]
655682 contact ["street" ] = "\n " .join (lines [1 :])
656683 break
@@ -714,7 +741,7 @@ def parse_dates(dates):
714741
715742 for date in dates :
716743 for rule in grammar ['_dateformats' ]:
717- result = re .match (rule , date , re . IGNORECASE )
744+ result = re .match (rule , date )
718745
719746 if result is not None :
720747 try :
0 commit comments