Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit 38fb737

Browse files
author
MooCow
authored
Merge pull request #233 from maarten-boot/temp2
add support for iana domains
2 parents 3e9f174 + 28b84cb commit 38fb737

File tree

3 files changed

+77
-45
lines changed

3 files changed

+77
-45
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ Raise an issue https://github.com/DannyCork/python-whois/issues/new
6262
* all tests from the original program are now files in the ./tests directory
6363
* test can be done on all supported tld's with -a or --all and limitest by regex with -r <pattern> or --reg=<pattern>
6464

65+
2022-11-04: maarten_boot
66+
* add support for Iana example.com, example.net
67+
6568
## Support
6669
* Python 3.x is supported.
6770
* Python 2.x IS NOT supported.

test2.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import re
55
import getopt
66
import sys
7-
import json
87

98
Verbose = False
109
Failures = {}

whois/_2_parse.py

Lines changed: 74 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -63,44 +63,34 @@ def cleanupWhoisResponse(
6363
verbose: bool = False,
6464
with_cleanup_results: bool = False,
6565
) -> str:
66-
67-
if 0:
68-
if verbose:
69-
print(f"BEFORE cleanup: \n{response}", file=sys.stderr)
66+
tmp2 = []
7067

7168
tmp: List = response.split("\n")
72-
73-
tmp2 = []
7469
for line in tmp:
7570
# some servers respond with: % Quota exceeded in the comment section (lines starting with %)
7671
if "quota exceeded" in line.lower():
7772
raise WhoisQuotaExceeded(response)
7873

79-
if with_cleanup_results is True and line.startswith("%"):
74+
if with_cleanup_results is True and line.startswith("%"): # only remove if requested
8075
continue
8176

82-
if "REDACTED FOR PRIVACY" in line:
77+
if "REDACTED FOR PRIVACY" in line: # these lines contibute nothing so ignore
8378
continue
8479

85-
if line.startswith("Terms of Use:"):
80+
if line.startswith("Terms of Use:"): # these lines contibute nothing so ignore
8681
continue
8782

8883
tmp2.append(line)
8984

90-
response = "\n".join(tmp2)
91-
if 0:
92-
if verbose:
93-
print(f"AFTER cleanup: \n{response}", file=sys.stderr)
94-
95-
return response
85+
return "\n".join(tmp2)
9686

9787

9888
def handleShortResponse(
9989
tld: str,
10090
dl: List,
10191
whois_str: str,
10292
verbose: bool = False,
103-
): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
93+
): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
10494
if verbose:
10595
d = ".".join(dl)
10696
print(f"line count < 5:: {tld} {d} {whois_str}", file=sys.stderr)
@@ -150,6 +140,7 @@ def handleShortResponse(
150140
# ---------------------------------
151141
raise FailedParsingWhoisOutput(whois_str)
152142

143+
153144
def doDnsSec(whois_str: str) -> bool:
154145
whois_dnssec: Any = whois_str.split("DNSSEC:")
155146
if len(whois_dnssec) >= 2:
@@ -158,35 +149,79 @@ def doDnsSec(whois_str: str) -> bool:
158149
return True
159150
return False
160151

161-
def doSourceIana(whois_str: str, verbose: bool = False) -> str:
162-
# here we can handle the example.com and example.net permanent IANA domains
163-
164-
if verbose:
165-
msg = f"i have seen source: IANA"
166-
print(msg, file=sys.stderr)
167-
168-
whois_splitted = whois_str.split("source: IANA")
169-
if len(whois_splitted) == 2:
170-
whois_str = whois_splitted[1] # often this is actually just whitespace
171-
return whois_str
172152

173153
def doIfServerNameLookForDomainName(whois_str: str, verbose: bool = False) -> str:
174154
# not often available anymore
175155
if re.findall(r"Server Name:\s?(.+)", whois_str, re.IGNORECASE):
176156
if verbose:
177-
msg = f"i have seen Server Name:, looking for Domain Name:"
157+
msg = "i have seen Server Name:, looking for Domain Name:"
178158
print(msg, file=sys.stderr)
179159
whois_str = whois_str[whois_str.find("Domain Name:") :]
180160
return whois_str
181161

162+
163+
def doExtractPattensIanaFromWhoisString(tld: str, r: Dict, whois_str: str, verbose: bool = False):
164+
# now handle the actual format if this whois response
165+
iana = {
166+
"domain_name": r"domain:\s?([^\n]+)",
167+
"registrar": r"organisation:\s?([^\n]+)",
168+
"creation_date": r"created:\s?([^\n]+)",
169+
}
170+
for k, v in iana.items():
171+
zz = re.findall(v, whois_str)
172+
if zz:
173+
if verbose:
174+
print(tld, zz, file=sys.stderr)
175+
r[k] = zz
176+
return r
177+
178+
179+
def doSourceIana(tld: str, r: Dict, whois_str: str, verbose: bool = False) -> str:
180+
# here we can handle the example.com and example.net permanent IANA domains
181+
182+
if verbose:
183+
msg = "i have seen source: IANA"
184+
print(msg, file=sys.stderr)
185+
186+
whois_splitted = whois_str.split("source: IANA")
187+
if len(whois_splitted) == 2 and whois_splitted[1].strip() != "":
188+
# if we see source: IANA and the part after is not only whitespace
189+
if verbose:
190+
msg = f"after IANA: {whois_splitted[1]}"
191+
print(msg, file=sys.stderr)
192+
193+
return whois_splitted[1], None
194+
195+
# try to parse this as a IANA domain as after is only whitespace
196+
r = doExtractPattensFromWhoisString(tld, r, whois_str, verbose) # set default values
197+
198+
# now handle the actual format if this whois response
199+
r = doExtractPattensIanaFromWhoisString(tld, r, whois_str, verbose)
200+
201+
return whois_str, r
202+
203+
204+
def doExtractPattensFromWhoisString(tld: str, r: Dict, whois_str: str, verbose: bool = False):
205+
for k, v in TLD_RE.get(tld, TLD_RE["com"]).items(): # use TLD_RE["com"] as default if a regex is missing
206+
if k.startswith("_"): # skip meta element like: _server or _privateRegistry
207+
continue
208+
209+
# Historical: here we use 'empty string' as default, not None
210+
if v is None:
211+
r[k] = [""]
212+
else:
213+
r[k] = v.findall(whois_str) or [""]
214+
215+
return r
216+
217+
182218
def do_parse(
183219
whois_str: str,
184220
tld: str,
185221
dl: List[str],
186222
verbose: bool = False,
187223
with_cleanup_results=False,
188224
) -> Optional[Dict[str, Any]]:
189-
r: Dict[str, Any] = {"tld": tld}
190225

191226
whois_str = cleanupWhoisResponse(
192227
response=whois_str,
@@ -197,22 +232,17 @@ def do_parse(
197232
if whois_str.count("\n") < 5:
198233
return handleShortResponse(tld, dl, whois_str, verbose)
199234

200-
r["DNSSEC"] = doDnsSec(whois_str) # check the status of DNSSEC
235+
r: Dict[str, Any] = {
236+
"tld": tld,
237+
"DNSSEC": doDnsSec(whois_str),
238+
}
201239

202-
if "source: IANA" in whois_str: # prepare for handling historical IANA domains
203-
whois_str = doSourceIana(whois_str, verbose)
240+
if "source: IANA" in whois_str: # prepare for handling historical IANA domains
241+
whois_str, ianaDomain = doSourceIana(tld, r, whois_str, verbose)
242+
if ianaDomain is not None:
243+
return ianaDomain
204244

205-
if "Server Name" in whois_str: # handle old type Server Name (not very common anymore)
245+
if "Server Name" in whois_str: # handle old type Server Name (not very common anymore)
206246
whois_str = doIfServerNameLookForDomainName(whois_str, verbose)
207247

208-
for k, v in TLD_RE.get(tld, TLD_RE["com"]).items(): # use TLD_RE["com"] as default if a regex is missing
209-
if k.startswith("_"): # skip meta element like: _server or _privateRegistry
210-
continue
211-
212-
# Historical: here we use 'empty string' as default, not None
213-
if v is None:
214-
r[k] = [""]
215-
else:
216-
r[k] = v.findall(whois_str) or [""]
217-
218-
return r
248+
return doExtractPattensFromWhoisString(tld, r, whois_str, verbose)

0 commit comments

Comments
 (0)