Skip to content
This repository was archived by the owner on Feb 3, 2024. It is now read-only.

Commit 005f329

Browse files
committed
add nameservers for domains with missing or missing number; makeTestDataAll.sh helps with building test data; test2.py extended to support input analysis; add optional emails in 3_ajust, used by some tld's
1 parent 84fa8d9 commit 005f329

File tree

6 files changed

+100
-88
lines changed

6 files changed

+100
-88
lines changed

TODO

Lines changed: 3 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,5 @@
1-
ERROR: output; missing nameserver 'dns1.idp365.net.' for tld: ac
2-
ERROR: output; missing nameserver 'dns3.idp365.net.' for tld: ac
3-
ERROR: output; missing nameserver 'dns2.idp365.net.' for tld: ac
4-
ERROR: output; missing nameserver 'ns3202.ispapi.net.' for tld: as
5-
ERROR: output; missing nameserver 'ns1.domain.io.' for tld: aw
6-
ERROR: output; missing nameserver 'ns2.domain.io.' for tld: aw
7-
ERROR: output; missing nameserver 'ns1.101domain.com.' for tld: ax
8-
ERROR: output; missing nameserver 'ns2.101domain.com.' for tld: ax
9-
ERROR: output; missing nameserver 'ns2.vodien.com.' for tld: com.sg
10-
ERROR: output; missing nameserver 'ns3.vodien.com.' for tld: com.sg
11-
ERROR: output; missing nameserver 'm.coowo.com.' for tld: com.tw
12-
ERROR: output; missing nameserver 'n.coowo.com.' for tld: com.tw
13-
ERROR: output; missing nameserver 'ns-1435.awsdns-51.org.' for tld: hk
14-
ERROR: output; missing nameserver 'ns-2007.awsdns-58.co.uk.' for tld: hk
15-
ERROR: output; missing nameserver 'ns-446.awsdns-55.com.' for tld: hk
16-
ERROR: output; missing nameserver 'ns-711.awsdns-24.net.' for tld: hk
17-
ERROR: output; missing nameserver 'ns3202.ispapi.net.' for tld: je
18-
ERROR: output; missing nameserver 'ns1.101domain.com.' for tld: kg
19-
ERROR: output; missing nameserver 'ns2.101domain.com.' for tld: kg
20-
ERROR: output; missing nameserver 'ns01.domaincontrol.com.' for tld: me
21-
ERROR: output; missing nameserver 'ns02.domaincontrol.com.' for tld: me
22-
ERROR: output; missing nameserver 'ns4.european-server.com.' for tld: nl
23-
ERROR: output; missing nameserver 'ns3.european-server.com.' for tld: nl
24-
ERROR: output; missing nameserver 'ns3202.ispapi.net.' for tld: pl
1+
TODO
2+
3+
# pt is difficult it often gives no data, it works in aws frankfurt through
254
ERROR: output; missing nameserver 'ns1.dnscpanel.com.' for tld: pt
265
ERROR: output; missing nameserver 'ns2.dnscpanel.com.' for tld: pt
27-
ERROR: output; missing nameserver 'ns3.netim.net.' for tld: sg
28-
ERROR: output; missing nameserver 'ns1.netim.net.' for tld: sg
29-
ERROR: output; missing nameserver 'ns2.netim.net.' for tld: sg
30-
ERROR: output; missing nameserver 'ns4.lovellsnames.org.' for tld: tn
31-
ERROR: output; missing nameserver 'ns3.lovellsnames.org.' for tld: tn
32-
ERROR: output; missing nameserver 'admns1.hinet.net.' for tld: tw
33-
ERROR: output; missing nameserver 'admns2.hinet.net.' for tld: tw
34-
ERROR: output; missing nameserver 'ns3.lovellsnames.org.' for tld: uk
35-
ERROR: output; missing nameserver 'ns2.lovellsnames.org.' for tld: uk
36-
ERROR: output; missing nameserver 'ns4.lovellsnames.org.' for tld: uk

makeTestdataAll.sh

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,13 @@ getTestDataInputForTldAndDomain()
6666
# make the testing input data
6767
# dont overwire the input file unless FORCE is requested
6868

69-
[ ! -s "$d/input" -o "$FORCE" = "1" ] && {
69+
[ "$FORCE" == "1" ] && {
70+
rm -f "$d/input"
71+
}
72+
73+
[ -s "$d/input" ] || {
7074
# for whois force english, force no cache
71-
# LANG=EN whois --force-lookup "meta.$tld" >"$d/input" || {
72-
whois --force-lookup "meta.$tld" >"$d/input" || {
75+
LANG=EN whois --force-lookup "meta.$tld" >"$d/input" || {
7376
# whois has a problem
7477
local ret=$?
7578
echo "ERROR: whois returns $ret for domain: $zz" >&2
@@ -91,7 +94,11 @@ getTestDataOutputForTldAndDomain()
9194

9295
# make the testing output data
9396
# dont overwrite the output file unless FORCE is requested
94-
[ ! -s "$d/output" -o "$FORCE" = "1" ] && {
97+
[ "$FORCE" == "1" ] && {
98+
rm -f "$d/output"
99+
}
100+
101+
[ -s "$d/output" ] || {
95102
./test2.py -d "$domain.$tld" >"$d/output"
96103
}
97104
}
@@ -108,7 +115,7 @@ getDnsSoaRecordAndLeaveEvidenceTldDomain()
108115
# get the soa record , if it exists proceed otherwise ignore this domain
109116
# along the way we store the raw soa record also
110117
host -t soa "$zz" |
111-
tee "$d/__dns-soa" |
118+
tee "$d/__dns-soa__$zz" |
112119
grep -q " has SOA record " || {
113120
# no soa record so that domain does not exist, cleanup the test dir
114121
cleanupTldTestDirectory "$tld" "$domain"
@@ -150,9 +157,9 @@ makeTestDataOriginalOneTldDomain()
150157
touch "$d/__domain__$zz"
151158

152159
# store the nameservers from dns
153-
host -t ns "$zz" > "$d/__dns-ns"
160+
host -t ns "$zz" > "$d/__dns-ns__$zz"
154161

155-
getTestDataInputForTldAndDomain || return 1
162+
getTestDataInputForTldAndDomain "$tld" "$domain" || return 1
156163

157164
getTestDataOutputForTldAndDomain "$tld" "$domain"
158165
return 0
@@ -180,7 +187,9 @@ makeTestDataTldFromDomains()
180187
while read domain
181188
do
182189
[ "$VERBOSE" = "1" ] && echo "try: $domain.$tld"
190+
183191
makeTestDataOriginalOneTldDomain "$tld" "$domain"
192+
184193
[ -s "$TMPDIR/$tld/input" ] && {
185194
[ "$VERBOSE" = "1" ] && ls -l "$TMPDIR/$tld/"
186195
testNameserverExistsInInputAndOutput "$tld" && break

reformat-code.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ doIt()
44
{
55
black --line-length 120 .
66

7-
pylama . |
7+
pylama *.py whois/ |
88
awk '
99
/__init__/ && / W0611/ { next }
1010
# / W0401 / { next }

test2.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,17 +147,16 @@ def printMe(self):
147147
for k in zz:
148148
n = 0
149149
for lines in self.rDict[k]:
150-
tab = " [TAB] " if "\t" in lines else "" # tabs are present in this section
151-
cr = " [CR] " if "\r" in lines else "" # \r is present in this section
152-
print(k,cr, tab, lines)
153-
150+
tab = " [TAB] " if "\t" in lines else "" # tabs are present in this section
151+
cr = " [CR] " if "\r" in lines else "" # \r is present in this section
152+
print(k, cr, tab, lines)
154153

155154
k = "Body"
156155
if len(self.rDict[k]):
157156
n = 0
158157
for lines in self.rDict[k]:
159-
tab = " [TAB] " if "\t" in lines else "-------" # tabs are present in this section
160-
cr = " [CR] " if "\r" in lines else "------" # \r is present in this section
158+
tab = " [TAB] " if "\t" in lines else "-------" # tabs are present in this section
159+
cr = " [CR] " if "\r" in lines else "------" # \r is present in this section
161160
print(f"# ------------- {k} Section: {n} {cr}{tab}---------")
162161
n += 1
163162
print(lines)

whois/_3_adjust.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,15 @@ def __init__(
111111
if "admin" in data:
112112
self.admin = data["admin"][0].strip()
113113

114+
if "emails" in data:
115+
self.emails = sorted( # sorted added to get predictable output during test
116+
list( # list(set(...))) to deduplicate results
117+
set(
118+
[s.strip() for s in data["emails"]],
119+
),
120+
),
121+
)
122+
114123

115124
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
116125
DATE_FORMATS = [

whois/tld_regexpr.py

Lines changed: 66 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -120,15 +120,21 @@
120120

121121
ax = {
122122
"extend": "com",
123-
"domain_name": r"domain...............:\s?(.+)",
124-
"registrar": r"registrar............:\s?(.+)",
125-
"creation_date": r"created..............:\s?(.+)",
126-
"expiration_date": r"expires..............:\s?(.+)",
127-
"updated_date": r"Information Updated:\s?(.+)",
123+
"domain_name": r"domain\.+:\s*(\S+)",
124+
"registrar": r"registrar\.+:\s*(.+)",
125+
"creation_date": r"created\.+:\s*(\S+)",
126+
"expiration_date": r"expires\.+:\s*(\S+)",
127+
"updated_date": r"modified\.+:\s?(\S+)",
128+
"name_servers": r"nserver\.+:\s*(\S+)",
129+
"status": r"status\.+:\s*(\S+)",
130+
"registrant": r"Holder\s+name\.+:\s*(.+)\r?\n", # not always present see meta.ax and google.ax
131+
"registrant_country": r"country\.+:\s*(.+)\r?\n", # not always present see meta.ax and google.ax
132+
128133
}
129134

130135
aw = {
131136
"extend": "nl",
137+
"name_servers": r"Domain nameservers:\s+(\S+)[ \t]*\r?\n(?:\s+(\S+))?",
132138
}
133139

134140
# Banking TLD - ICANN
@@ -445,7 +451,8 @@
445451
"creation_date": r"Domain Name Commencement Date:\s?(.+)",
446452
"expiration_date": r"Expiry Date:\s?(.+)",
447453
"updated_date": None,
448-
"name_servers": r"Name Servers Information:\n\n(?:(\S+)\n)(?:(\S+)\n)(?:(\S+)\n)?(?:(\S+)\n)?\n?",
454+
# name servers have trailing whitespace, lines are \n only
455+
"name_servers": r"Name Servers Information:\s*(?:(\S+)[ \t]*\n)(?:(\S+)[ \t]*\n)?(?:(\S+)[ \t]*\n)?(?:(\S+)[ \t]*\n)?",
449456
"status": None,
450457
}
451458

@@ -592,15 +599,15 @@
592599
}
593600

594601
kg = {
595-
"extend": None,
596-
"domain_name": r"Domain\s(.+)\s\(",
602+
"domain_name": r"Domain\s+(\S+)",
597603
"registrar": r"Billing\sContact:\n.*\n\s+Name:\s(.+)\n",
598604
"registrant_country": None,
599605
"expiration_date": r"Record expires on:\s+(.+)",
600606
"creation_date": r"Record created:\s+(.+)",
601607
"updated_date": r"Record last updated on:\s+(.+)",
602-
"name_servers": None,
603-
"status": None,
608+
# name servers have trailing whitespace
609+
"name_servers": r"Name servers in the listed order:\n\n(?:(\S+)[ \t]*\n)(?:(\S+)[ \t]*\n)?",
610+
"status": r"Domain\s+\S+\s+\((\S+)\)",
604611
}
605612

606613
# Saint Kitts and Nevis
@@ -652,11 +659,12 @@
652659
}
653660

654661
me = {
662+
# lines have \r
655663
"extend": "biz",
656-
"creation_date": r"Domain Create Date:\s?(.+)",
657-
"expiration_date": r"Domain Expiration Date:\s?(.+)",
658-
"updated_date": r"Domain Last Updated Date:\s?(.+)",
659-
"name_servers": r"Nameservers:\s?(.+)",
664+
"creation_date": r"Creation Date:\s?(.+)",
665+
"expiration_date": r"Expiry Date:\s?(.+)",
666+
"updated_date": r"Updated Date:\s?(.+)",
667+
"name_servers": r"Name Server:\s*(\S+)\r?\n",
660668
"status": r"Domain Status:\s?(.+)",
661669
}
662670

@@ -721,19 +729,30 @@
721729
"domain_name": r"Domain name:\s?(.+)",
722730
"name_servers": (
723731
r"""(?x:
724-
Domain\ nameservers:[ \t]*\n
725-
(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n # ns1.tld.nl [A?]
726-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns2.tld.nl [A?]
727-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns2.tld.nl [AAAA?]
728-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns3.tld.nl [A?]
729-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns3.tld.nl [AAAA?]
730-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns4.tld.nl [A?]
731-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns4.tld.nl [AAAA?]
732-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns5.tld.nl [A?]
733-
(?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns5.tld.nl [AAAA?]
734-
# Don't check for final LF; there might be even more records..
732+
Domain\ nameservers:\s+(\S+)\r?\n # the first
733+
(?:\s+(\S+)\r?\n)? # a optional 2th
734+
(?:\s+(\S+)\r?\n)? # a optional 3th
735+
(?:\s+(\S+)\r?\n)? # a optional 4th
736+
(?:\s+(\S+)\r?\n)? # a optional 5th
737+
# there may be more, best use host -t ns <domain> to get the actual nameservers
735738
)"""
736739
),
740+
# the format with [A] or [AAAA] is no longer in use
741+
# "name_servers": (
742+
# r"""(?x:
743+
# Domain\ nameservers:[ \t]*\n
744+
# (?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n # ns1.tld.nl [A?]
745+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns2.tld.nl [A?]
746+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns2.tld.nl [AAAA?]
747+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns3.tld.nl [A?]
748+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns3.tld.nl [AAAA?]
749+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns4.tld.nl [A?]
750+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns4.tld.nl [AAAA?]
751+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns5.tld.nl [A?]
752+
# (?:(?:[ \t]+) (\S+) (?:[ \t]+\S+)? \n)? # opt-ns5.tld.nl [AAAA?]
753+
# # Don't check for final LF; there might be even more records..
754+
# )"""
755+
# ),
737756
"reseller": r"Reseller:\s?(.+)",
738757
"abuse_contact": r"Abuse Contact:\s?(.+)",
739758
}
@@ -809,12 +828,13 @@
809828
}
810829

811830
pl = {
831+
# pl has lines ending in multiple line feeds \r and trailing whitespace
812832
"extend": "uk",
813833
"registrar": r"\nREGISTRAR:\s*(.+)\n",
814834
"creation_date": r"\ncreated:\s*(.+)\n",
815835
"updated_date": r"\nlast modified:\s*(.+)\n",
816836
"expiration_date": r"\noption expiration date:\s*(.+)\n",
817-
"name_servers": r"\nnameservers:\s*(.+)\n\s*(.+)\n",
837+
"name_servers": r"\nnameservers:(?:\s*(\S+)[ \t\r]*\n)(?:\s*(\S+)[ \t\r]*\n)?(?:\s*(\S+)[ \t\r]*\n)?",
818838
"status": r"\nStatus:\n\s*(.+)",
819839
}
820840

@@ -823,16 +843,21 @@
823843
}
824844

825845
pt = {
846+
# mboot 2022-11-16
847+
# from aws frankfurt all ok, looks like network limitations
848+
# actually it sometimes works, most of the time though we get: connect: Network is unreachable
826849
# looks like this is now a privateRegistry mboot: 2022-06-10,
827850
# manual lookup: use the website at whois.dns.pt
828-
"_privateRegistry": True,
851+
"_server": "whois.dns.pt",
852+
# "_privateRegistry": True,
829853
"extend": "com",
830854
"domain_name": r"Domain:\s?(.+)",
831855
"registrar": None,
832856
"creation_date": r"Creation Date:\s?(.+)",
833857
"expiration_date": r"Expiration Date:\s?(.+)",
834858
"updated_date": None,
835-
"name_servers": r"Name Server:\s*(.+)",
859+
# nameservers have trailing info: Name Server: ns1.dnscpanel.com | IPv4: and IPv6:
860+
"name_servers": r"Name Server:(?:\s*(\S+)[^\n]*\n)(?:\s*(\S+)[^\n]*\n)?",
836861
"status": r"Domain Status:\s?(.+)",
837862
}
838863

@@ -910,17 +935,19 @@
910935

911936
# Singapore - Commercial sub-domain
912937
com_sg = {
938+
# uses \r nameservers have trailing whitespace
913939
"extend": None,
914940
"domain_name": r"Domain Name:\s?(.+)",
915941
"registrar": r"Registrar:\s?(.+)",
916-
"registrant": r"Registrant:\n\n\s?Name:\s?(.+)",
942+
"registrant": r"Registrant:\r?\n\r?\n\s*Name:\s*(.+)\r?\n",
917943
"registrant_country": None,
918944
"creation_date": r"Creation Date:\s?(.+)",
919945
"expiration_date": r"Expiration Date:\s?(.+)",
920946
"updated_date": r"Modified Date:\s?(.+)",
921-
"name_servers": r"Name Servers:\s*(.+)\s*",
922-
"status": None,
923-
"emails": r"[\w.-]+@[\w.-]+\.[\w]{2,4}",
947+
"name_servers": r"Name Servers:\r\n(?:\s*(\S+)[ \t\r]*\n)(?:\s*(\S+)[ \t\r]*\n)?(?:\s*(\S+)[ \t\r]*\n)?",
948+
"status": r"Domain Status:\s*(.*)\r\n",
949+
# "emails": r"(\S+@\S+)",
950+
"emails": r"([\w\.-]+@[\w\.-]+\.[\w])",
924951
}
925952

926953
# Slovakia
@@ -981,7 +1008,7 @@
9811008
"creation_date": r"Creation date\.+:\s?(.+)",
9821009
"expiration_date": None,
9831010
"updated_date": None,
984-
"name_servers": r"DNS servers\s?Name\.+:\s?(.+)\s*Name\.+:\s?(.+)?",
1011+
"name_servers": r"DNS servers\n(?:Name\.+:\s*(\S+)\n)(?:Name\.+:\s*(\S+)\n)?(?:Name\.+:\s*(\S+)\n)?(?:Name\.+:\s*(\S+)\n)?",
9851012
"status": r"Domain status\.+:(.+)",
9861013
"emails": r"[\w.-]+@[\w.-]+\.[\w]{2,4}",
9871014
}
@@ -1047,7 +1074,7 @@
10471074
"creation_date": r"Registered on:\s*(.+)",
10481075
"expiration_date": r"Expiry date:\s*(.+)",
10491076
"updated_date": r"Last updated:\s*(.+)",
1050-
"name_servers": r"Name Servers:\s*(.+)\s*",
1077+
"name_servers": r"Name Servers:\s*(\S+)\r?\n(?:\s+(\S+)\r?\n)?(?:\s+(\S+)\r?\n)?(?:\s+(\S+)\r?\n)?",
10511078
"status": r"Registration status:\n\s*(.+)",
10521079
}
10531080

@@ -1664,7 +1691,7 @@
16641691
"updated_date": r"\s+Modified Date:\s+(.+)",
16651692
"status": r"\s+Domain Status:\s(.+)",
16661693
"registrant_country": None,
1667-
"name_servers": None, # actually a multi line match: TODO
1694+
"name_servers": r"Name Servers:\s+(\S+)[ \t]*\r?\n\s+(\S+)[ \t]*\r?\n\s+(\S+)",
16681695
}
16691696

16701697
srl = {
@@ -1692,17 +1719,15 @@
16921719
"registrar": r"Registration\s+Service\s+Provider:\s+(.+)",
16931720
"updated_date": None,
16941721
"registrant_country": None,
1695-
"name_servers": None,
1722+
"name_servers": r"Domain servers in listed order:\s*(\S+)[ \t]*\r?\n(?:\s+(\S+)[ \t]*\r?\n)?(?:\s+(\S+)[ \t]*\r?\n)?(?:\s+(\S+)[ \t]*\r?\n)?",
16961723
}
16971724

1698-
16991725
com_tw = {
1700-
"_server": "tw",
1726+
"extend": "tw",
17011727
}
17021728

17031729
ug = {
17041730
"_server": "whois.co.ug",
1705-
"extend": None,
17061731
"domain_name": r"Domain name:\s+(.+)",
17071732
"creation_date": r"Registered On:\s+(.+)",
17081733
"expiration_date": r"Expires On:\s+(.+)",
@@ -1905,11 +1930,12 @@
19051930
onion = {"extend": "_privateReg"}
19061931

19071932
# backend registry for domain names ending in GG, JE, and AS.
1933+
# lines may have \r actually before \n , updated all 3 domains return all nameservers
19081934
gg = {
19091935
"domain_name": r"Domain:\s*\n\s+(.+)",
19101936
"status": r"Domain Status:\s*\n\s+(.+)",
19111937
"registrar": r"Registrar:\s*\n\s+(.+)",
1912-
"name_servers": r"Name servers:\s*\n\s+(.+)\n\s+(.+)",
1938+
"name_servers": r"Name servers:\s*\r?\n(?:\s+(\S+)\r?\n)(?:\s+(\S+)\r?\n)?(?:\s+(\S+)\r?\n)?",
19131939
"creation_date": r"Relevant dates:\s*\n\s+Registered on(.+)",
19141940
"expiration_date": None,
19151941
"updated_date": None,

0 commit comments

Comments
 (0)