Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit 7ac6217

Browse files
author
MooCow
authored
Merge pull request #241 from maarten-boot/development
add several tld,s; add testprogram maketestdatAll for fast investigation of new tld's
2 parents 04413f8 + 43f5b35 commit 7ac6217

File tree

34 files changed

+903
-75
lines changed

34 files changed

+903
-75
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,4 +70,5 @@ reformat-code.sh
7070
t1.py
7171
typescript
7272
test.out
73+
diff.out
7374
tmp/

DONE

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
DONE
2+
2022-11-17:mboot;
3+
4+
- tld ac; add registrant_country
5+
6+
- email regex use \. in: "emails": r"[\w\.-]+@[\w\.-]+\.[\w]{2,4}"
7+
this now results in valid data on google.com and meta.com and all derived from .com tld
8+
9+
- add comment in tld_resgepr.py that emails, status, name_serevers are multi items (lists)
10+
and all the rest are single results.
11+
12+
- add comments to beginning of tld_regexpr.py explaining that all matches are actually case insensitive (findall)
13+
and that many whois responses have trailing whitespace and may end in \r\n
14+
this helps with constructing regexes for future use
15+
16+
- add skipFromHere in _2_parse.py: lines starting with ^>>> signify the end of a normal whois response
17+
after this line there is only human or legal information so we can simply skip that text
18+
(a similar construct with ^--\s will be done later)
19+
(( retracted we have domains where this is very early: switched off for now, make this configurable ))
20+
21+
- add commment that unfortunately we cannot currently use rtrim on input from whois response and may regexes expect
22+
either \r or trailing whitespace, this can be done later and would make many regexes simpler in end detection
23+
24+
- simplefy com.tr regex from multiple . to \.+, and add note on the trailing dot behind the date
25+
add meta.com.tr to regular testdata
26+
27+
- fix test2.py cleanpostabmble for e.g com.sg which has lines at the top with ------------
28+
- add getops to makeTestDataAll.sh
29+
- allow maketestDataAll.sh -v -f -d com.sg to show the reduced input.out and the produced output of test2.py
30+
- add '[WHITESPACE AT END]' in input.out during input reduction test
31+
32+
- add tld do, com.do, mo, com.mo, cx, dz (never answeres), gd, mn, gay, tl, tt (responds with perl script)
33+
34+
- add -t <tld> and -d <dmian> to makeTastdataAll support a cusom domain and not meta, google default
35+
36+
- add tld st, so, nrw, lat, so , realestate, ph, com.ph, org.ph, net.ph, zm
37+
38+
- fix tokyo, it needs a explicit server, add sy it has a whois server but i cannot get it to answer, set to privateReg
39+
- add net.tr and tr (privateReg), onl, blue, garden, promo, one,
40+
41+
- add nic to default test group for makeTestdataAll.sh
42+

bin/find_input_no_output.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#! /bin/bash
2+
3+
get_status_output()
4+
{
5+
ls ./tmp/ |
6+
while read item
7+
do
8+
d="tmp/$item"
9+
[ -s "$d/input" ] && {
10+
[ ! -f "$d/output" ] && {
11+
echo "# NO_OUTPUT for $item"
12+
}
13+
}
14+
done
15+
}
16+
17+
main()
18+
{
19+
get_status_output
20+
}
21+
22+
main

makeTestdataAll.sh

Lines changed: 97 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22

33
TMPDIR="./tmp" # the default work directory is a local tmp (exclude by .gitignore )
44

5-
FORCE=0 # force whois loogup by not using cached data
6-
VERBOSE=0 # along the way inform us on progress and results
5+
FORCE=0 # force whois loogup by not using cached data
6+
VERBOSE=0 # along the way inform us on progress and results
7+
ALL=0 # process all supported tld's
78

89
prepTempDir()
910
{
10-
# make the work dir if it does nt exist
11+
# make the work dir if it does not exist
1112
mkdir -p "$TMPDIR" || {
1213
echo "FATAL: cannot make test dir: $TMPDIR" >&2
1314
exit 101
@@ -27,23 +28,27 @@ testNameserverExistsInInputAndOutput()
2728
local dns="$d/__dns-ns"
2829

2930
[ -s "$dns" ] || return # dont bother at all if we have no dns file
31+
[ -s "$d/input" ] || return # dont bother if we have no input file
3032

3133
rm -f "$d/error.ns"
3234

3335
cat "$dns" |
3436
awk '{ print $NF }' |
3537
while read ns
3638
do
37-
[ -s "$d/input" ] || return # dont bother if we have no input file
38-
39+
[ "$VERBOSE" = "1" ] && echo "test $ns in input"
3940
grep -q -i "$ns" "$d/input" && {
4041
# only test in the output if it is present in the input
42+
43+
[ "$VERBOSE" = "1" ] && echo "test $ns in output"
4144
grep -q -i "$ns" "$d/output" || {
4245
echo "ERROR: output; missing nameserver '$ns' for tld: $tld" |
4346
tee -a "$d/error.ns"
47+
return 1
4448
}
4549
}
4650
done
51+
return 0
4752
}
4853

4954
cleanupTldTestDirectory()
@@ -128,7 +133,6 @@ getDnsSoaRecordAndLeaveEvidenceTldDomain()
128133
makeDirectoryForTld()
129134
{
130135
local tld="$1"
131-
local domain="$2"
132136
local d="$TMPDIR/$tld"
133137

134138
mkdir -p "$d" || {
@@ -153,48 +157,63 @@ makeTestDataOriginalOneTldDomain()
153157

154158
getDnsSoaRecordAndLeaveEvidenceTldDomain "$tld" "$domain" || return 1
155159

156-
# what domain did we test
157-
touch "$d/__domain__$zz"
158-
159-
# store the nameservers from dns
160-
host -t ns "$zz" > "$d/__dns-ns__$zz"
161-
160+
touch "$d/__domain__$zz" # what domain did we test
161+
host -t ns "$zz" > "$d/__dns-ns__$zz" # store the nameservers from dns
162162
getTestDataInputForTldAndDomain "$tld" "$domain" || return 1
163-
164-
getTestDataOutputForTldAndDomain "$tld" "$domain"
163+
getTestDataOutputForTldAndDomain "$tld" "$domain" || return 1
165164
return 0
166165
}
167166

168167
domainsToTry()
169168
{
170-
cat <<! |
169+
local domain="$1"
170+
171+
[ "$domain" = "__DEFAULT__" ] && {
172+
cat <<! |
173+
nic
171174
meta
172175
google
173176
!
174-
awk '
175-
/^[ \t]*$/ { next }
176-
/^[ \t]*;/ { next }
177-
/^[ \t]*#/ { next }
178-
{ print $1 }
179-
'
177+
awk '
178+
/^[ \t]*$/ { next }
179+
/^[ \t]*;/ { next }
180+
/^[ \t]*#/ { next }
181+
{ print $1 }
182+
'
183+
return
184+
}
185+
186+
echo "$domain"
180187
}
181188

182189
makeTestDataTldFromDomains()
183190
{
184191
local tld="$1"
192+
local domain="$2"
185193

186-
domainsToTry |
194+
domainsToTry "$domain" |
187195
while read domain
188196
do
189197
[ "$VERBOSE" = "1" ] && echo "try: $domain.$tld"
190198

191-
makeTestDataOriginalOneTldDomain "$tld" "$domain"
192-
193-
[ -s "$TMPDIR/$tld/input" ] && {
194-
[ "$VERBOSE" = "1" ] && ls -l "$TMPDIR/$tld/"
195-
testNameserverExistsInInputAndOutput "$tld" && break
199+
makeTestDataOriginalOneTldDomain "$tld" "$domain" && {
200+
[ -s "$TMPDIR/$tld/input" ] && {
201+
[ "$VERBOSE" = "1" ] && {
202+
ls -l "$TMPDIR/$tld/"
203+
}
204+
}
205+
break
196206
}
197207
done
208+
209+
[ "$VERBOSE" = "1" ] && {
210+
# show the rediced input and the output
211+
cat "$TMPDIR/$tld/input.out" "$TMPDIR/$tld/output"
212+
}
213+
214+
testNameserverExistsInInputAndOutput "$tld" && {
215+
return
216+
}
198217
}
199218

200219
makeRulesFromTldIfExist()
@@ -209,12 +228,13 @@ makeRulesFromTldIfExist()
209228
makeTestDataOriginalOneTld()
210229
{
211230
local tld="$1"
231+
local domain="$2"
212232

213233
[ "$VERBOSE" = "1" ] && echo "try: $tld"
214234

215-
makeDirectoryForTld "$tld" "$domain" || exit 101
235+
makeDirectoryForTld "$tld" || exit 101
216236
makeRulesFromTldIfExist "$tld"
217-
makeTestDataTldFromDomains "$tld"
237+
makeTestDataTldFromDomains "$tld" "$domain"
218238
}
219239

220240
makeTestDataOriginalAllTldSupported()
@@ -227,19 +247,62 @@ makeTestDataOriginalAllTldSupported()
227247
done
228248
}
229249

250+
usage()
251+
{
252+
cat <<!
253+
$0 usage:
254+
-h show the help text
255+
-v switch on verbose (will show progress)
256+
-f switch on force (will re analyze all)
257+
-t <domain> specify a tld to analize
258+
-a analyze all tld currently supported
259+
!
260+
exit 0;
261+
}
262+
230263
main()
231264
{
265+
[ $# -eq 0 ] && usage
266+
267+
VERBOSE=0
268+
FORCE=0
269+
ALL=0
270+
271+
local domain="__DEFAULT__"
272+
273+
while getopts "havft:d:" arg;
274+
do
275+
case $arg in
276+
277+
v) VERBOSE=1
278+
;;
279+
280+
f) FORCE=1
281+
;;
282+
283+
a) ALL=1
284+
;;
285+
286+
t) tld="${OPTARG}"
287+
;;
288+
289+
d) domain="${OPTARG}"
290+
# instead of the default meta and google use this domain and combine it with the tld for processing
291+
;;
292+
293+
h | *) usage
294+
;;
295+
esac
296+
done
297+
232298
prepTempDir
233299

234-
[ "$#" = "0" ] && {
300+
[ "$ALL" = "1" ] && {
235301
makeTestDataOriginalAllTldSupported
236302
return
237303
}
238304

239-
for tld in $*
240-
do
241-
makeTestDataOriginalOneTld "$tld"
242-
done
305+
makeTestDataOriginalOneTld "$tld" "$domain"
243306
}
244307

245308
main $* 2>&1 |

test.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ testOneDomain()
2222
echo "testing: $domain"
2323
./test2.py -d "$domain" >"$TestDataDir/$domain/test.out"
2424

25-
diff "$TestDataDir/$domain/output" "$TestDataDir/$domain/test.out" | tee "$TestDataDir/$domain/out"
25+
diff "$TestDataDir/$domain/output" "$TestDataDir/$domain/test.out" |
26+
tee "$TestDataDir/$domain/diff.out"
2627
}
2728

2829
main()

test2.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ def cleanupWhoisResponse(
120120
percentSeen = True
121121

122122
if postambleSeen is False:
123-
if line.startswith("--") or line.startswith(">>> ") or line.startswith("Copyright notice"):
123+
if line.startswith("-- ") or line.startswith(">>> ") or line.startswith("Copyright notice"):
124124
postambleSeen = True
125125

126126
if postambleSeen is True:
@@ -155,9 +155,10 @@ def printMe(self):
155155
if len(self.rDict[k]):
156156
n = 0
157157
for lines in self.rDict[k]:
158-
tab = " [TAB] " if "\t" in lines else "-------" # tabs are present in this section
159-
cr = " [CR] " if "\r" in lines else "------" # \r is present in this section
160-
print(f"# ------------- {k} Section: {n} {cr}{tab}---------")
158+
ws = " [WHITESPACE AT END] " if re.search(r"[ \t]+\r?\n", lines) else ""
159+
tab = " [TAB] " if "\t" in lines else "" # tabs are present in this section
160+
cr = " [CR] " if "\r" in lines else "" # \r is present in this section
161+
print(f"# --- {k} Section: {n} {cr}{tab}{ws}")
161162
n += 1
162163
print(lines)
163164

testdata/example.com/output

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ statuses list ['']
1212
dnssec bool False
1313
name_servers list []
1414
registrant str ''
15+
emails list ['']

testdata/example.net/output

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ statuses list ['']
1212
dnssec bool False
1313
name_servers list []
1414
registrant str ''
15+
emails list ['']

testdata/example.org/input

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ Name Server: a.iana-servers.net
5858
Name Server: b.iana-servers.net
5959
DNSSEC: signedDelegation
6060
URL of the ICANN Whois Inaccuracy Complaint Form: https://www.icann.org/wicf/
61-
>>> Last update of WHOIS database: 2022-11-07T21:05:05Z <<<
61+
>>> Last update of WHOIS database: 2022-11-17T10:47:37Z <<<
6262

6363
For more information on Whois status codes, please visit https://icann.org/epp
6464

65-
Terms of Use: Access to Public Interest Registry WHOIS information is provided to assist persons in determining the contents of a domain name registration record in the Public Interest Registry registry database. The data in this record is provided by Public Interest Registry for informational purposes only, and Public Interest Registry does not guarantee its accuracy. This service is intended only for query-based access. You agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to (a) allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the data recipient's own existing customers; or (b) enable high volume, automated, electronic processes that send queries or data to the systems of Registry Operator, a Registrar, or Donuts except as reasonably necessary to register domain names or modify existing registrations. All rights reserved. Public Interest Registry reserves the right to modify these terms at any time. By submitting this query, you agree to abide by this policy. The Registrar of Record identified in this output may have an RDDS service that can be queried for additional information on how to contact the Registrant, Admin, or Tech contact of the queried domain name.
65+
Terms of Use: Access to Public Interest Registry WHOIS information is provided to assist persons in determining the contents of a domain name registration record in the Public Interest Registry registry database. The data in this record is provided by Public Interest Registry for informational purposes only, and Public Interest Registry does not guarantee its accuracy. This service is intended only for query-based access. You agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to (a) allow, enable, or otherwise support the transmission by e-mail, telephone, or facsimile of mass unsolicited, commercial advertising or solicitations to entities other than the data recipient's own existing customers; or (b) enable high volume, automated, electronic processes that send queries or data to the systems of Registry Operator, a Registrar, or Identity Digital except as reasonably necessary to register domain names or modify existing registrations. All rights reserved. Public Interest Registry reserves the right to modify these terms at any time. By submitting this query, you agree to abide by this policy. The Registrar of Record identified in this output may have an RDDS service that can be queried for additional information on how to contact the Registrant, Admin, or Tech contact of the queried domain name.

testdata/example.org/output

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ statuses list ['serverDeleteProhibited https://icann.org/
1212
dnssec bool True
1313
name_servers list ['a.iana-servers.net', 'b.iana-servers.net']
1414
registrant str 'ICANN'
15+
emails list ['']

0 commit comments

Comments
 (0)