Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit c3666f0

Browse files
committed
add doIfServerNameLookForDomainName; move some comments; show where we use 'empty string' as default return
1 parent 1ee9567 commit c3666f0

File tree

1 file changed

+30
-16
lines changed

1 file changed

+30
-16
lines changed

whois/_2_parse.py

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def handleShortResponse(
100100
dl: List,
101101
whois_str: str,
102102
verbose: bool = False,
103-
):
103+
): # returns None or raises one of (WhoisQuotaExceeded, FailedParsingWhoisOutput)
104104
if verbose:
105105
d = ".".join(dl)
106106
print(f"line count < 5:: {tld} {d} {whois_str}", file=sys.stderr)
@@ -150,14 +150,35 @@ def handleShortResponse(
150150
# ---------------------------------
151151
raise FailedParsingWhoisOutput(whois_str)
152152

153-
def doDnsSec(whois_str: str):
153+
def doDnsSec(whois_str: str) -> bool:
154154
whois_dnssec: Any = whois_str.split("DNSSEC:")
155155
if len(whois_dnssec) >= 2:
156156
whois_dnssec = whois_dnssec[1].split("\n")[0]
157157
if whois_dnssec.strip() == "signedDelegation" or whois_dnssec.strip() == "yes":
158158
return True
159159
return False
160160

161+
def doSourceIana(whois_str: str, verbose: bool = False) -> str:
162+
# here we can handle the example.com and example.net permanent IANA domains
163+
164+
if verbose:
165+
msg = f"i have seen source: IANA"
166+
print(msg, file=sys.stderr)
167+
168+
whois_splitted = whois_str.split("source: IANA")
169+
if len(whois_splitted) == 2:
170+
whois_str = whois_splitted[1] # often this is actually just whitespace
171+
return whois_str
172+
173+
def doIfServerNameLookForDomainName(whois_str: str, verbose: bool = False) -> str:
174+
# not often available anymore
175+
if re.findall(r"Server Name:\s?(.+)", whois_str, re.IGNORECASE):
176+
if verbose:
177+
msg = f"i have seen Server Name:, looking for Domain Name:"
178+
print(msg, file=sys.stderr)
179+
whois_str = whois_str[whois_str.find("Domain Name:") :]
180+
return whois_str
181+
161182
def do_parse(
162183
whois_str: str,
163184
tld: str,
@@ -178,27 +199,20 @@ def do_parse(
178199

179200
r["DNSSEC"] = doDnsSec(whois_str) # check the status of DNSSEC
180201

181-
# this is mostly not available in many tld's anymore, should be investigated
182-
# split whois_str to remove first IANA part showing info for TLD only
183-
whois_splitted = whois_str.split("source: IANA")
184-
if len(whois_splitted) == 2:
185-
whois_str = whois_splitted[1]
202+
if "source: IANA" in whois_str: # prepare for handling historical IANA domains
203+
whois_str = doSourceIana(whois_str, verbose)
186204

187-
# also not available for many modern tld's
188-
sn = re.findall(r"Server Name:\s?(.+)", whois_str, re.IGNORECASE)
189-
if sn:
190-
whois_str = whois_str[whois_str.find("Domain Name:") :]
205+
if "Server Name" in whois_str: # handle old type Server Name (not very common anymore)
206+
whois_str = doIfServerNameLookForDomainName(whois_str, verbose)
191207

192-
# return TLD_RE["com"] as default if tld not exists in TLD_RE
193-
for k, v in TLD_RE.get(tld, TLD_RE["com"]).items():
194-
if k.startswith("_"):
195-
# skip meta element like: _server or _privateRegistry
208+
for k, v in TLD_RE.get(tld, TLD_RE["com"]).items(): # use TLD_RE["com"] as default if a regex is missing
209+
if k.startswith("_"): # skip meta element like: _server or _privateRegistry
196210
continue
197211

212+
# Historical: here we use 'empty string' as default, not None
198213
if v is None:
199214
r[k] = [""]
200215
else:
201216
r[k] = v.findall(whois_str) or [""]
202-
# print("DEBUG: Keyval = " + str(r[k]))
203217

204218
return r

0 commit comments

Comments
 (0)