Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit c9a3707

Browse files
committed
add return_raw_text_for_unsupported_tld to return only the raw text for unsupported domains
1 parent baade89 commit c9a3707

File tree

6 files changed

+184
-6
lines changed

6 files changed

+184
-6
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ Raise an issue https://github.com/DannyCork/python-whois/issues/new
6969
* add testing against static known data in dir: ./testdata/<domain>/output
7070
* test.sh will test all domains in testdata without actually calling whois, the input data is instead read from testdata/<domain>/input
7171

72+
2022-11-11: maarten_boot
73+
* add support for returning the raw data from the whois command: flag include_raw_whois_text
74+
* add support for handling unsupported domains via whois raw text only: flag return_raw_text_for_unsupported_tld
75+
7276
## Support
7377
* Python 3.x is supported.
7478
* Python 2.x IS NOT supported.

makeTestdataAll.sh

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#! /bin/bash
2+
3+
TMPDIR="./tmp"
4+
FORCE=0
5+
VERBOSE=1
6+
7+
prepTempDir()
8+
{
9+
mkdir -p "$TMPDIR" || {
10+
echo "FATAL: cannot make test dir: $TMPDIR" >&2
11+
exit 101
12+
}
13+
}
14+
15+
getAllTldSupported()
16+
{
17+
./test2.py -S
18+
}
19+
20+
makeTestDataOriginalOne()
21+
{
22+
local tld="$1"
23+
local domain="$2"
24+
25+
local d="$TMPDIR/$tld"
26+
mkdir -p "$d" || {
27+
echo "FATAL: cannot make tld directory: '$d'" >&2
28+
exit 101
29+
}
30+
31+
local zz="$domain.$tld"
32+
33+
host -t soa "$zz" |
34+
tee "$d/__dns-soa" |
35+
grep " has SOA record " || {
36+
# no soa record so that domain does not exist, cleanup the test dir
37+
rm -f "$d/input" "$d/output" "$d/__domain__$zz" "$d/__dns-soa" "$d/dns-ns"
38+
echo "INFO: no domain for $zz" >&2
39+
return 1
40+
}
41+
42+
host -t ns "$zz" > "$d/__dns-ns"
43+
44+
# what domain did we test
45+
touch "$d/__domain__$zz"
46+
47+
# force english, force no cache
48+
[ ! -s "$d/input" -o "$FORCE" = "1" ] && {
49+
LANG=EN whois --force-lookup "meta.$tld" >"$d/input" || {
50+
# whois has a problem
51+
rm -f "$d/input" "$d/output" "$d/__domain__$zz" "$d/__dns-soa" "$d/dns-ns"
52+
return 1
53+
}
54+
}
55+
56+
[ ! -s "$d/output" -o "$FORCE" = "1" ] && {
57+
./test2.py -d "meta.$tld" >"$d/output"
58+
}
59+
60+
return 0
61+
}
62+
63+
domainsToTry()
64+
{
65+
cat <<! |
66+
meta
67+
google
68+
!
69+
awk '
70+
/^[ \t]*$/ { next }
71+
/^[ \t]*;/ { next }
72+
/^[ \t]*#/ { next }
73+
{ print $1 }
74+
'
75+
}
76+
77+
makeTestDataOriginalAll()
78+
{
79+
getAllTldSupported |
80+
while read tld
81+
do
82+
echo "try: $tld"
83+
84+
domainsToTry |
85+
while read domain
86+
do
87+
makeTestDataOriginalOne "$tld" "$domain"
88+
[ -f "$TMPDIR/$tld/input" ] && {
89+
[ "$VERBOSE" ] && {
90+
ls -l "$TMPDIR/$tld/"
91+
}
92+
break
93+
}
94+
done
95+
done
96+
}
97+
98+
main()
99+
{
100+
prepTempDir
101+
makeTestDataOriginalAll
102+
}
103+
104+
main

test.sh

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
#! /usr/bin/bash
22

33
# signal whois module that we are testing, this reads data from testdata/<domain>/in
4-
TestDataDir=$(realpath ./testdata)
5-
export TEST_WHOIS_PYTHON="$TestDataDir"
4+
prepPath()
5+
{
6+
local xpath="$1"
7+
TestDataDir=$( realpath "$xpath" )
8+
export TEST_WHOIS_PYTHON="$TestDataDir"
9+
}
610

711
get_testdomains()
812
{
@@ -23,11 +27,16 @@ testOneDomain()
2327

2428
main()
2529
{
30+
prepPath "testdata" # set a default
31+
[ -d "$1" ] && { # if a argument and is a dir use that for testing
32+
prepPath "$1"
33+
}
34+
2635
get_testdomains |
2736
while read line
2837
do
2938
testOneDomain $(basename $line)
3039
done
3140
}
3241

33-
main
42+
main $*

test2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def main(argv):
260260
fileData = {}
261261

262262
for opt, arg in opts:
263-
if opt in ( "-S", "SupportedTld"):
263+
if opt in ("-S", "SupportedTld"):
264264
for tld in sorted(whois.validTlds()):
265265
print(tld)
266266
sys.exit(0)

whois/_3_adjust.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,16 @@ def __init__(
3636
whois_str: Optional[str] = None,
3737
verbose: bool = False,
3838
include_raw_whois_text: bool = False,
39+
return_raw_text_for_unsupported_tld: bool = False,
3940
):
4041
if include_raw_whois_text and whois_str is not None:
4142
self.text = whois_str
4243

44+
if return_raw_text_for_unsupported_tld is True:
45+
self.tld = data["tld"]
46+
self.name = data["domain_name"][0].strip().lower()
47+
return
48+
4349
self.name = data["domain_name"][0].strip().lower()
4450
self.tld = data["tld"]
4551

whois/__init__.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,10 @@ def fromDomainStringToTld(domain: str, internationalized: bool, verbose: bool =
208208
return tld, d
209209

210210

211-
def validateWeKnowTheToplevelDomain(tld): # may raise UnknownTld
211+
def validateWeKnowTheToplevelDomain(tld, return_raw_text_for_unsupported_tld: bool = False): # may raise UnknownTld
212212
if tld not in TLD_RE.keys():
213+
if return_raw_text_for_unsupported_tld:
214+
return None
213215
a = f"The TLD {tld} is currently not supported by this package."
214216
b = "Use validTlds() to see what toplevel domains are supported."
215217
msg = f"{a} {b}"
@@ -244,6 +246,45 @@ def doSlowdownHintForThisTld(tld: str, thisTld, slow_down: int, verbose: bool =
244246
return slow_down
245247

246248

249+
def doUnsupportedTldAnyway(
250+
tld: str,
251+
dl: Dict,
252+
ignore_returncode: bool = False,
253+
slow_down: int = 0,
254+
server: Optional[str] = None,
255+
verbose: bool = False,
256+
):
257+
include_raw_whois_text = True
258+
259+
# we will not hunt for possible valid first level domains as we have no actual feedback
260+
261+
whois_str = do_query(
262+
dl=dl,
263+
slow_down=slow_down,
264+
ignore_returncode=ignore_returncode,
265+
server=server,
266+
verbose=verbose,
267+
)
268+
269+
# we will only return minimal data
270+
data = {
271+
"tld": tld,
272+
"domain_name": "",
273+
}
274+
data["domain_name"] = [".".join(dl)] # note the fields are default all array, except tld
275+
276+
if verbose:
277+
print(data, file=sys.stderr)
278+
279+
return Domain(
280+
data=data,
281+
whois_str=whois_str,
282+
verbose=verbose,
283+
include_raw_whois_text=include_raw_whois_text,
284+
return_raw_text_for_unsupported_tld=True,
285+
)
286+
287+
247288
def query(
248289
domain: str,
249290
force: bool = False,
@@ -255,6 +296,7 @@ def query(
255296
with_cleanup_results=False,
256297
internationalized: bool = False,
257298
include_raw_whois_text: bool = False,
299+
return_raw_text_for_unsupported_tld: bool = False,
258300
) -> Optional[Domain]:
259301
"""
260302
force=True Don't use cache.
@@ -270,15 +312,28 @@ def query(
270312
verbose: if true, print relevant information on steps taken to standard error
271313
include_raw_whois_text:
272314
if reqested the full response is also returned.
315+
return_raw_text_for_unsupported_tld:
316+
if the tld is unsupported, just try it anyway but return only the raw text.
273317
"""
274318

275319
assert isinstance(domain, str), Exception("`domain` - must be <str>")
320+
return_raw_text_for_unsupported_tld = bool(return_raw_text_for_unsupported_tld)
276321

277322
tld, dl = fromDomainStringToTld(domain, internationalized, verbose)
278323
if tld is None:
279324
return None
280325

281-
thisTld = validateWeKnowTheToplevelDomain(tld) # may raise UnknownTld
326+
thisTld = validateWeKnowTheToplevelDomain(tld, return_raw_text_for_unsupported_tld) # may raise UnknownTld
327+
if thisTld is None:
328+
return doUnsupportedTldAnyway(
329+
tld,
330+
dl,
331+
ignore_returncode=ignore_returncode,
332+
slow_down=slow_down,
333+
server=server,
334+
verbose=verbose,
335+
)
336+
282337
verifyPrivateREgistry(thisTld) # may raise WhoisPrivateRegistry
283338
server = doServerHintsForThisTld(tld, thisTld, server, verbose)
284339

0 commit comments

Comments
 (0)