|
38 | 38 | forceDownloadTld=forceDownloadTld, |
39 | 39 | ) |
40 | 40 |
|
| 41 | +# ge python whois known tld's and second level domains |
41 | 42 | known = sorted(whois.validTlds()) |
42 | 43 |
|
| 44 | +# get iana data |
43 | 45 | URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt" |
44 | 46 | response = urllib.request.urlopen(URL) |
45 | 47 | data = response.read().decode("utf-8").lower() |
46 | 48 | dataList = sorted(data.splitlines()) |
47 | 49 |
|
| 50 | +# filter out known names and try to detect names not known by iana |
48 | 51 | for name in known: |
49 | | - # print(name) |
50 | 52 | if name in dataList: |
51 | | - del dataList[dataList.index(name)] |
52 | | - |
53 | | -# Try to auto detect new domaisn via IANA and some known common regex lists like .com |
54 | | - |
| 53 | + continue |
| 54 | + if "." in name: |
| 55 | + continue |
| 56 | + if name not in dataList: |
| 57 | + print(f"{name} tld name from python_whois is not known in IANA list") |
| 58 | + continue |
| 59 | + |
| 60 | +dataList2 = [] |
| 61 | +for name in dataList: |
| 62 | + if name in known: |
| 63 | + continue |
| 64 | + dataList2.append(name) |
| 65 | + |
| 66 | +# Try to auto detect new domains via IANA and some known common regex lists like .com |
55 | 67 | found = {} |
56 | | -for tld in dataList: |
| 68 | +for tld in dataList2: |
57 | 69 | data, status = i.getInfoOnOneTld(tld) |
58 | | - # print(data) |
| 70 | + # print(status, data) |
59 | 71 |
|
60 | 72 | if data and "whois" in data and data["whois"] and data["whois"] != "NULL": |
61 | 73 | wh = data["whois"] |
62 | | - # print(tld, wh, data, status) |
63 | 74 | if wh.endswith(f".{tld}"): |
64 | 75 | dd = wh.split(".")[-2:] |
65 | 76 | else: |
66 | 77 | dd = ["meta", tld] |
67 | 78 |
|
68 | | - # print(dd) |
69 | 79 | zz = _do_whois_query( |
70 | 80 | dd, |
71 | 81 | ignore_returncode=False, |
72 | 82 | server=wh, |
73 | 83 | ) |
74 | | - # print(zz) |
75 | 84 |
|
76 | 85 | pp = {"_server": wh, "extend": "com"} |
77 | 86 | aDictToTestOverride = {tld: pp} |
|
87 | 96 | except Exception as e: |
88 | 97 | print(e) |
89 | 98 |
|
90 | | -for tld in found: |
91 | | - print(f"## ZZ['{tld}'] = {found[tld]} # auto-detected via IANA tld") |
92 | | - |
93 | | -# TODO |
94 | | -# also make a list of all tld (without dot in them) that no longer exists in iana, we can remove them |
| 99 | + else: |
| 100 | + print(f"no whois info for tld: {tld}\n", data) |
0 commit comments