@@ -63,44 +63,34 @@ def cleanupWhoisResponse(
6363 verbose : bool = False ,
6464 with_cleanup_results : bool = False ,
6565) -> str :
66-
67- if 0 :
68- if verbose :
69- print (f"BEFORE cleanup: \n { response } " , file = sys .stderr )
66+ tmp2 = []
7067
7168 tmp : List = response .split ("\n " )
72-
73- tmp2 = []
7469 for line in tmp :
7570 # some servers respond with: % Quota exceeded in the comment section (lines starting with %)
7671 if "quota exceeded" in line .lower ():
7772 raise WhoisQuotaExceeded (response )
7873
79- if with_cleanup_results is True and line .startswith ("%" ):
74+ if with_cleanup_results is True and line .startswith ("%" ): # only remove if requested
8075 continue
8176
82- if "REDACTED FOR PRIVACY" in line :
77+ if "REDACTED FOR PRIVACY" in line : # these lines contibute nothing so ignore
8378 continue
8479
85- if line .startswith ("Terms of Use:" ):
80+ if line .startswith ("Terms of Use:" ): # these lines contibute nothing so ignore
8681 continue
8782
8883 tmp2 .append (line )
8984
90- response = "\n " .join (tmp2 )
91- if 0 :
92- if verbose :
93- print (f"AFTER cleanup: \n { response } " , file = sys .stderr )
94-
95- return response
85+ return "\n " .join (tmp2 )
9686
9787
9888def handleShortResponse (
9989 tld : str ,
10090 dl : List ,
10191 whois_str : str ,
10292 verbose : bool = False ,
103- ): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
93+ ): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
10494 if verbose :
10595 d = "." .join (dl )
10696 print (f"line count < 5:: { tld } { d } { whois_str } " , file = sys .stderr )
@@ -150,6 +140,7 @@ def handleShortResponse(
150140 # ---------------------------------
151141 raise FailedParsingWhoisOutput (whois_str )
152142
143+
153144def doDnsSec (whois_str : str ) -> bool :
154145 whois_dnssec : Any = whois_str .split ("DNSSEC:" )
155146 if len (whois_dnssec ) >= 2 :
@@ -158,35 +149,79 @@ def doDnsSec(whois_str: str) -> bool:
158149 return True
159150 return False
160151
161- def doSourceIana (whois_str : str , verbose : bool = False ) -> str :
162- # here we can handle the example.com and example.net permanent IANA domains
163-
164- if verbose :
165- msg = f"i have seen source: IANA"
166- print (msg , file = sys .stderr )
167-
168- whois_splitted = whois_str .split ("source: IANA" )
169- if len (whois_splitted ) == 2 :
170- whois_str = whois_splitted [1 ] # often this is actually just whitespace
171- return whois_str
172152
173153def doIfServerNameLookForDomainName (whois_str : str , verbose : bool = False ) -> str :
174154 # not often available anymore
175155 if re .findall (r"Server Name:\s?(.+)" , whois_str , re .IGNORECASE ):
176156 if verbose :
177- msg = f "i have seen Server Name:, looking for Domain Name:"
157+ msg = "i have seen Server Name:, looking for Domain Name:"
178158 print (msg , file = sys .stderr )
179159 whois_str = whois_str [whois_str .find ("Domain Name:" ) :]
180160 return whois_str
181161
162+
163+ def doExtractPattensIanaFromWhoisString (tld : str , r : Dict , whois_str : str , verbose : bool = False ):
164+ # now handle the actual format if this whois response
165+ iana = {
166+ "domain_name" : r"domain:\s?([^\n]+)" ,
167+ "registrar" : r"organisation:\s?([^\n]+)" ,
168+ "creation_date" : r"created:\s?([^\n]+)" ,
169+ }
170+ for k , v in iana .items ():
171+ zz = re .findall (v , whois_str )
172+ if zz :
173+ if verbose :
174+ print (tld , zz , file = sys .stderr )
175+ r [k ] = zz
176+ return r
177+
178+
179+ def doSourceIana (tld : str , r : Dict , whois_str : str , verbose : bool = False ) -> str :
180+ # here we can handle the example.com and example.net permanent IANA domains
181+
182+ if verbose :
183+ msg = "i have seen source: IANA"
184+ print (msg , file = sys .stderr )
185+
186+ whois_splitted = whois_str .split ("source: IANA" )
187+ if len (whois_splitted ) == 2 and whois_splitted [1 ].strip () != "" :
188+ # if we see source: IANA and the part after is not only whitespace
189+ if verbose :
190+ msg = f"after IANA: { whois_splitted [1 ]} "
191+ print (msg , file = sys .stderr )
192+
193+ return whois_splitted [1 ], None
194+
195+ # try to parse this as a IANA domain as after is only whitespace
196+ r = doExtractPattensFromWhoisString (tld , r , whois_str , verbose ) # set default values
197+
198+ # now handle the actual format if this whois response
199+ r = doExtractPattensIanaFromWhoisString (tld , r , whois_str , verbose )
200+
201+ return whois_str , r
202+
203+
204+ def doExtractPattensFromWhoisString (tld : str , r : Dict , whois_str : str , verbose : bool = False ):
205+ for k , v in TLD_RE .get (tld , TLD_RE ["com" ]).items (): # use TLD_RE["com"] as default if a regex is missing
206+ if k .startswith ("_" ): # skip meta element like: _server or _privateRegistry
207+ continue
208+
209+ # Historical: here we use 'empty string' as default, not None
210+ if v is None :
211+ r [k ] = ["" ]
212+ else :
213+ r [k ] = v .findall (whois_str ) or ["" ]
214+
215+ return r
216+
217+
182218def do_parse (
183219 whois_str : str ,
184220 tld : str ,
185221 dl : List [str ],
186222 verbose : bool = False ,
187223 with_cleanup_results = False ,
188224) -> Optional [Dict [str , Any ]]:
189- r : Dict [str , Any ] = {"tld" : tld }
190225
191226 whois_str = cleanupWhoisResponse (
192227 response = whois_str ,
@@ -197,22 +232,17 @@ def do_parse(
197232 if whois_str .count ("\n " ) < 5 :
198233 return handleShortResponse (tld , dl , whois_str , verbose )
199234
200- r ["DNSSEC" ] = doDnsSec (whois_str ) # check the status of DNSSEC
235+ r : Dict [str , Any ] = {
236+ "tld" : tld ,
237+ "DNSSEC" : doDnsSec (whois_str ),
238+ }
201239
202- if "source: IANA" in whois_str : # prepare for handling historical IANA domains
203- whois_str = doSourceIana (whois_str , verbose )
240+ if "source: IANA" in whois_str : # prepare for handling historical IANA domains
241+ whois_str , ianaDomain = doSourceIana (tld , r , whois_str , verbose )
242+ if ianaDomain is not None :
243+ return ianaDomain
204244
205- if "Server Name" in whois_str : # handle old type Server Name (not very common anymore)
245+ if "Server Name" in whois_str : # handle old type Server Name (not very common anymore)
206246 whois_str = doIfServerNameLookForDomainName (whois_str , verbose )
207247
208- for k , v in TLD_RE .get (tld , TLD_RE ["com" ]).items (): # use TLD_RE["com"] as default if a regex is missing
209- if k .startswith ("_" ): # skip meta element like: _server or _privateRegistry
210- continue
211-
212- # Historical: here we use 'empty string' as default, not None
213- if v is None :
214- r [k ] = ["" ]
215- else :
216- r [k ] = v .findall (whois_str ) or ["" ]
217-
218- return r
248+ return doExtractPattensFromWhoisString (tld , r , whois_str , verbose )
0 commit comments