@@ -100,7 +100,7 @@ def handleShortResponse(
100100 dl : List ,
101101 whois_str : str ,
102102 verbose : bool = False ,
103- ):
103+ ): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
104104 if verbose :
105105 d = "." .join (dl )
106106 print (f"line count < 5:: { tld } { d } { whois_str } " , file = sys .stderr )
@@ -150,14 +150,35 @@ def handleShortResponse(
150150 # ---------------------------------
151151 raise FailedParsingWhoisOutput (whois_str )
152152
153- def doDnsSec (whois_str : str ):
153+ def doDnsSec (whois_str : str ) -> bool :
154154 whois_dnssec : Any = whois_str .split ("DNSSEC:" )
155155 if len (whois_dnssec ) >= 2 :
156156 whois_dnssec = whois_dnssec [1 ].split ("\n " )[0 ]
157157 if whois_dnssec .strip () == "signedDelegation" or whois_dnssec .strip () == "yes" :
158158 return True
159159 return False
160160
161+ def doSourceIana (whois_str : str , verbose : bool = False ) -> str :
162+ # here we can handle the example.com and example.net permanent IANA domains
163+
164+ if verbose :
165+ msg = f"i have seen source: IANA"
166+ print (msg , file = sys .stderr )
167+
168+ whois_splitted = whois_str .split ("source: IANA" )
169+ if len (whois_splitted ) == 2 :
170+ whois_str = whois_splitted [1 ] # often this is actually just whitespace
171+ return whois_str
172+
173+ def doIfServerNameLookForDomainName (whois_str : str , verbose : bool = False ) -> str :
174+ # not often available anymore
175+ if re .findall (r"Server Name:\s?(.+)" , whois_str , re .IGNORECASE ):
176+ if verbose :
177+ msg = f"i have seen Server Name:, looking for Domain Name:"
178+ print (msg , file = sys .stderr )
179+ whois_str = whois_str [whois_str .find ("Domain Name:" ) :]
180+ return whois_str
181+
161182def do_parse (
162183 whois_str : str ,
163184 tld : str ,
@@ -178,27 +199,20 @@ def do_parse(
178199
179200 r ["DNSSEC" ] = doDnsSec (whois_str ) # check the status of DNSSEC
180201
181- # this is mostly not available in many tld's anymore, should be investigated
182- # split whois_str to remove first IANA part showing info for TLD only
183- whois_splitted = whois_str .split ("source: IANA" )
184- if len (whois_splitted ) == 2 :
185- whois_str = whois_splitted [1 ]
202+ if "source: IANA" in whois_str : # prepare for handling historical IANA domains
203+ whois_str = doSourceIana (whois_str , verbose )
186204
187- # also not available for many modern tld's
188- sn = re .findall (r"Server Name:\s?(.+)" , whois_str , re .IGNORECASE )
189- if sn :
190- whois_str = whois_str [whois_str .find ("Domain Name:" ) :]
205+ if "Server Name" in whois_str : # handle old type Server Name (not very common anymore)
206+ whois_str = doIfServerNameLookForDomainName (whois_str , verbose )
191207
192- # return TLD_RE["com"] as default if tld not exists in TLD_RE
193- for k , v in TLD_RE .get (tld , TLD_RE ["com" ]).items ():
194- if k .startswith ("_" ):
195- # skip meta element like: _server or _privateRegistry
208+ for k , v in TLD_RE .get (tld , TLD_RE ["com" ]).items (): # use TLD_RE["com"] as default if a regex is missing
209+ if k .startswith ("_" ): # skip meta element like: _server or _privateRegistry
196210 continue
197211
212+ # Historical: here we use 'empty string' as default, not None
198213 if v is None :
199214 r [k ] = ["" ]
200215 else :
201216 r [k ] = v .findall (whois_str ) or ["" ]
202- # print("DEBUG: Keyval = " + str(r[k]))
203217
204218 return r
0 commit comments