Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit 3e9f174

Browse files
author
MooCow
authored
Merge pull request #232 from maarten-boot/temp2
move sections in do_parse to functions; prepare for handling old style IANA domain
2 parents cf0af34 + a4ca843 commit 3e9f174

File tree

4 files changed

+117
-76
lines changed

4 files changed

+117
-76
lines changed

test2.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,21 @@
44
import re
55
import getopt
66
import sys
7+
import json
78

89
Verbose = False
910
Failures = {}
1011
IgnoreReturncode = False
1112

1213

1314
def prepItem(d):
14-
print("-" * 80)
15-
print(d)
15+
print("")
16+
print(f"test domain: <<<<<<<<<< {d} >>>>>>>>>>>>>>>>>>>>")
17+
18+
19+
def xType(x):
20+
s = f"{type(x)}"
21+
return s.split("'")[1]
1622

1723

1824
def testItem(d):
@@ -27,9 +33,21 @@ def testItem(d):
2733
print("None")
2834
return
2935

36+
# the 3 date time items can be None if not present or a datetime string
37+
# dnssec is a bool
38+
# some strings are return as '' when empty (status)
39+
# statuses can be a array of one empty string if no data
40+
41+
# not all values are always present it mainly depends on whet we see in the output of whois
42+
# if we return not None: the elements that ars always there ars domain_name , tld, dnssec
43+
3044
wd = w.__dict__
3145
for k, v in wd.items():
32-
print('%20s\t"%s"' % (k, v))
46+
ss = "%-18s %-17s "
47+
if isinstance(v, str):
48+
print((ss + "'%s'") % (k, xType(v), v))
49+
else:
50+
print((ss + "%s") % (k, xType(v), v))
3351

3452

3553
def errorItem(d, e, what="Generic"):
@@ -74,8 +92,8 @@ def testDomains(aList):
7492
errorItem(d, e, what="WhoisQuotaExceeded")
7593
except whois.WhoisPrivateRegistry as e:
7694
errorItem(d, e, what="WhoisPrivateRegistry")
77-
except Exception as e:
78-
errorItem(d, e, what="Generic")
95+
# except Exception as e:
96+
# errorItem(d, e, what="Generic")
7997

8098

8199
def getTestFileOne(fPath, fileData):

whois/_2_parse.py

Lines changed: 93 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,90 @@ def cleanupWhoisResponse(
9595
return response
9696

9797

98+
def handleShortResponse(
99+
tld: str,
100+
dl: List,
101+
whois_str: str,
102+
verbose: bool = False,
103+
): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
104+
if verbose:
105+
d = ".".join(dl)
106+
print(f"line count < 5:: {tld} {d} {whois_str}", file=sys.stderr)
107+
108+
s = whois_str.strip().lower()
109+
110+
# NOTE: from here s is lowercase only
111+
# ---------------------------------
112+
noneStrings = [
113+
"not found",
114+
"no entries found",
115+
"status: free",
116+
"no such domain",
117+
"the queried object does not exist",
118+
"domain you requested is not known",
119+
"status: available",
120+
]
121+
122+
for i in noneStrings:
123+
if i in s:
124+
return None
125+
126+
# ---------------------------------
127+
# is there any error string in the result
128+
if s.count("error"):
129+
return None
130+
131+
# ---------------------------------
132+
quotaStrings = [
133+
"limit exceeded",
134+
"quota exceeded",
135+
"try again later",
136+
"please try again",
137+
"exceeded the maximum allowable number",
138+
"can temporarily not be answered",
139+
"please try again.",
140+
"queried interval is too short",
141+
]
142+
143+
for i in quotaStrings:
144+
if i in s:
145+
raise WhoisQuotaExceeded(whois_str)
146+
147+
# ---------------------------------
148+
# ToDo: Name or service not known
149+
150+
# ---------------------------------
151+
raise FailedParsingWhoisOutput(whois_str)
152+
153+
def doDnsSec(whois_str: str) -> bool:
154+
whois_dnssec: Any = whois_str.split("DNSSEC:")
155+
if len(whois_dnssec) >= 2:
156+
whois_dnssec = whois_dnssec[1].split("\n")[0]
157+
if whois_dnssec.strip() == "signedDelegation" or whois_dnssec.strip() == "yes":
158+
return True
159+
return False
160+
161+
def doSourceIana(whois_str: str, verbose: bool = False) -> str:
162+
# here we can handle the example.com and example.net permanent IANA domains
163+
164+
if verbose:
165+
msg = f"i have seen source: IANA"
166+
print(msg, file=sys.stderr)
167+
168+
whois_splitted = whois_str.split("source: IANA")
169+
if len(whois_splitted) == 2:
170+
whois_str = whois_splitted[1] # often this is actually just whitespace
171+
return whois_str
172+
173+
def doIfServerNameLookForDomainName(whois_str: str, verbose: bool = False) -> str:
174+
# not often available anymore
175+
if re.findall(r"Server Name:\s?(.+)", whois_str, re.IGNORECASE):
176+
if verbose:
177+
msg = f"i have seen Server Name:, looking for Domain Name:"
178+
print(msg, file=sys.stderr)
179+
whois_str = whois_str[whois_str.find("Domain Name:") :]
180+
return whois_str
181+
98182
def do_parse(
99183
whois_str: str,
100184
tld: str,
@@ -111,84 +195,24 @@ def do_parse(
111195
)
112196

113197
if whois_str.count("\n") < 5:
114-
if verbose:
115-
d = ".".join(dl)
116-
print(f"line count < 5:: {tld} {d} {whois_str}", file=sys.stderr)
117-
118-
s = whois_str.strip().lower()
119-
120-
# NOTE: from here s is lowercase only
121-
# ---------------------------------
122-
noneStrings = [
123-
"not found",
124-
"no entries found",
125-
"status: free",
126-
"no such domain",
127-
"the queried object does not exist",
128-
"domain you requested is not known",
129-
"status: available",
130-
]
131-
132-
for i in noneStrings:
133-
if i in s:
134-
return None
135-
136-
# ---------------------------------
137-
# is there any error string in the result
138-
if s.count("error"):
139-
return None
198+
return handleShortResponse(tld, dl, whois_str, verbose)
140199

141-
# ---------------------------------
142-
quotaStrings = [
143-
"limit exceeded",
144-
"quota exceeded",
145-
"try again later",
146-
"please try again",
147-
"exceeded the maximum allowable number",
148-
"can temporarily not be answered",
149-
"please try again.",
150-
"queried interval is too short",
151-
]
152-
153-
for i in quotaStrings:
154-
if i in s:
155-
raise WhoisQuotaExceeded(whois_str)
156-
157-
# ---------------------------------
158-
# ToDo: Name or service not known
159-
160-
# ---------------------------------
161-
raise FailedParsingWhoisOutput(whois_str)
162-
163-
# check the status of DNSSEC
164-
r["DNSSEC"] = False
165-
whois_dnssec: Any = whois_str.split("DNSSEC:")
166-
if len(whois_dnssec) >= 2:
167-
whois_dnssec = whois_dnssec[1].split("\n")[0]
168-
if whois_dnssec.strip() == "signedDelegation" or whois_dnssec.strip() == "yes":
169-
r["DNSSEC"] = True
200+
r["DNSSEC"] = doDnsSec(whois_str) # check the status of DNSSEC
170201

171-
# this is mostly not available in many tld's anymore, should be investigated
172-
# split whois_str to remove first IANA part showing info for TLD only
173-
whois_splitted = whois_str.split("source: IANA")
174-
if len(whois_splitted) == 2:
175-
whois_str = whois_splitted[1]
202+
if "source: IANA" in whois_str: # prepare for handling historical IANA domains
203+
whois_str = doSourceIana(whois_str, verbose)
176204

177-
# also not available for many modern tld's
178-
sn = re.findall(r"Server Name:\s?(.+)", whois_str, re.IGNORECASE)
179-
if sn:
180-
whois_str = whois_str[whois_str.find("Domain Name:") :]
205+
if "Server Name" in whois_str: # handle old type Server Name (not very common anymore)
206+
whois_str = doIfServerNameLookForDomainName(whois_str, verbose)
181207

182-
# return TLD_RE["com"] as default if tld not exists in TLD_RE
183-
for k, v in TLD_RE.get(tld, TLD_RE["com"]).items():
184-
if k.startswith("_"):
185-
# skip meta element like: _server or _privateRegistry
208+
for k, v in TLD_RE.get(tld, TLD_RE["com"]).items(): # use TLD_RE["com"] as default if a regex is missing
209+
if k.startswith("_"): # skip meta element like: _server or _privateRegistry
186210
continue
187211

212+
# Historical: here we use 'empty string' as default, not None
188213
if v is None:
189214
r[k] = [""]
190215
else:
191216
r[k] = v.findall(whois_str) or [""]
192-
# print("DEBUG: Keyval = " + str(r[k]))
193217

194218
return r

whois/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
Map2Underscore = {
4949
".ac.uk": "ac_uk",
5050
".co.uk": "co_uk",
51-
5251
".co.il": "co_il",
5352
# uganda
5453
".ca.ug": "ca_ug",

whois/tld_regexpr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@
115115
au = {
116116
"extend": "com",
117117
"registrar": r"Registrar Name:\s?(.+)",
118-
"updated_date": r"Last Modified:([^\n]*)", # fix empty LastModified
118+
"updated_date": r"Last Modified:([^\n]*)", # fix empty LastModified
119119
}
120120

121121
ax = {

0 commit comments

Comments
 (0)