Skip to content

Commit 786873a

Browse files
Merge pull request #102 from ChuckWoodraska/ID-1385
ID-1385 add data pruner utility function
2 parents 7c10602 + 4211f01 commit 786873a

File tree

5 files changed

+203
-1
lines changed

5 files changed

+203
-1
lines changed

domaintools/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,28 @@ def get_average_age(domains):
7373
total += get_domain_age(str(d.get("create_date")))
7474

7575
return total // count if count else None
76+
77+
78+
def prune_data(data_obj):
79+
"""
80+
Does a deep dive through a data object to prune any null or empty items. Checks for empty lists, dicts, and strs.
81+
Args:
82+
data_obj: Either a list or dict that needs to be pruned
83+
"""
84+
items_to_prune = []
85+
if isinstance(data_obj, dict) and len(data_obj):
86+
for k, v in data_obj.items():
87+
if isinstance(data_obj[k], dict) or isinstance(data_obj[k], list):
88+
prune_data(data_obj[k])
89+
if not isinstance(v, int) and not v:
90+
items_to_prune.append(k)
91+
elif k == 'count' and v == 0:
92+
items_to_prune.append(k)
93+
for k in items_to_prune:
94+
del data_obj[k]
95+
elif isinstance(data_obj, list) and len(data_obj):
96+
for index, item in enumerate(data_obj):
97+
prune_data(item)
98+
if not isinstance(item, int) and not item:
99+
items_to_prune.append(index)
100+
data_obj[:] = [item for index, item in enumerate(data_obj) if index not in items_to_prune and len(item)]

tests/responses/__init__.py

Whitespace-only changes.

tests/responses/expected_data.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
def prune_espn_expected():
2+
return {'has_more_results': False,
3+
'limit_exceeded': False,
4+
'message': 'Enjoy your data.',
5+
'results': [{'active': True,
6+
'additional_whois_email': [{'count': 1573632,
7+
'value': '[email protected]'}],
8+
'admin_contact': {'city': {'count': 110095, 'value': 'Bristol'},
9+
'country': {'count': 189951325, 'value': 'us'},
10+
'email': [{'count': 1356,
11+
'value': '[email protected]'}],
12+
'fax': {'count': 1379, 'value': '18607664502'},
13+
'name': {'count': 1, 'value': 'ESPN, Inc.'},
14+
'org': {'count': 1, 'value': 'ESPN, Inc.'},
15+
'phone': {'count': 1034,
16+
'value': '18607662000'},
17+
'postal': {'count': 1387,
18+
'value': '06010-1001'},
19+
'state': {'count': 560553, 'value': 'CT'}},
20+
'alexa': 183,
21+
'create_date': {'count': 31, 'value': '1994-10-04'},
22+
'data_updated_timestamp': '2020-04-14T09:09:06',
23+
'domain': 'espn.com',
24+
'domain_risk': {'components': [{'name': 'allowlist',
25+
'risk_score': 0}],
26+
'risk_score': 0},
27+
'email_domain': [{'count': 1420, 'value': 'espn.com'},
28+
{'count': 1813887, 'value': 'cscglobal.com'}],
29+
'expiration_date': {'count': 729160, 'value': '2020-10-03'},
30+
'ip': [{'address': {'count': 5833, 'value': '13.224.13.26'},
31+
'asn': [{'count': 15806987, 'value': 16509}],
32+
'country_code': {'count': 333903806, 'value': 'us'},
33+
'isp': {'count': 14091796,
34+
'value': 'Amazon Technologies Inc.'}},
35+
{'address': {'count': 5912, 'value': '13.224.13.62'},
36+
'asn': [{'count': 15806987, 'value': 16509}],
37+
'country_code': {'count': 333903806, 'value': 'us'},
38+
'isp': {'count': 14091796,
39+
'value': 'Amazon Technologies Inc.'}},
40+
{'address': {'count': 5953, 'value': '13.224.13.66'},
41+
'asn': [{'count': 15806987, 'value': 16509}],
42+
'country_code': {'count': 333903806, 'value': 'us'},
43+
'isp': {'count': 14091796,
44+
'value': 'Amazon Technologies Inc.'}},
45+
{'address': {'count': 6197, 'value': '13.224.13.80'},
46+
'asn': [{'count': 15806987, 'value': 16509}],
47+
'country_code': {'count': 333903806, 'value': 'us'},
48+
'isp': {'count': 14091796,
49+
'value': 'Amazon Technologies Inc.'}},
50+
{'address': {'count': 5879, 'value': '99.86.32.125'},
51+
'asn': [{'count': 15806987, 'value': 16509}],
52+
'country_code': {'count': 333903806, 'value': 'us'},
53+
'isp': {'count': 5530009, 'value': 'Amazon.com Inc.'}},
54+
{'address': {'count': 5794, 'value': '99.86.32.27'},
55+
'asn': [{'count': 15806987, 'value': 16509}],
56+
'country_code': {'count': 333903806, 'value': 'us'},
57+
'isp': {'count': 5530009, 'value': 'Amazon.com Inc.'}},
58+
{'address': {'count': 5452, 'value': '99.86.32.32'},
59+
'asn': [{'count': 15806987, 'value': 16509}],
60+
'country_code': {'count': 333903806, 'value': 'us'},
61+
'isp': {'count': 5530009, 'value': 'Amazon.com Inc.'}},
62+
{'address': {'count': 6100, 'value': '99.86.32.4'},
63+
'asn': [{'count': 15806987, 'value': 16509}],
64+
'country_code': {'count': 333903806, 'value': 'us'},
65+
'isp': {'count': 5530009, 'value': 'Amazon.com Inc.'}}],
66+
'mx': [{'domain': {'count': 9083544, 'value': 'outlook.com'},
67+
'host': {'count': 1,
68+
'value': 'espn-com.mail.protection.outlook.com'},
69+
'ip': [{'count': 435650, 'value': '104.47.45.36'},
70+
{'count': 432743, 'value': '104.47.44.36'}],
71+
'priority': 10}],
72+
'name_server': [{'domain': {'count': 52299,
73+
'value': 'awsdns-02.org'},
74+
'host': {'count': 5945,
75+
'value': 'ns-1045.awsdns-02.org'},
76+
'ip': [{'count': 6052,
77+
'value': '205.251.196.21'}]},
78+
{'domain': {'count': 49898,
79+
'value': 'awsdns-15.com'},
80+
'host': {'count': 6026,
81+
'value': 'ns-122.awsdns-15.com'},
82+
'ip': [{'count': 6199,
83+
'value': '205.251.192.122'}]},
84+
{'domain': {'count': 8440243, 'value': 'co.uk'},
85+
'host': {'count': 6183,
86+
'value': 'ns-1936.awsdns-50.co.uk'},
87+
'ip': [{'count': 6273,
88+
'value': '205.251.199.144'}]},
89+
{'domain': {'count': 48670,
90+
'value': 'awsdns-41.net'},
91+
'host': {'count': 6826,
92+
'value': 'ns-846.awsdns-41.net'},
93+
'ip': [{'count': 6838,
94+
'value': '205.251.195.78'}]}],
95+
'redirect': {'count': 454, 'value': 'espn.go.com'},
96+
'redirect_domain': {'count': 62216, 'value': 'go.com'},
97+
'registrant_contact': {'city': {'count': 110095,
98+
'value': 'Bristol'},
99+
'country': {'count': 189951325,
100+
'value': 'us'},
101+
'email': [{'count': 1354,
102+
'value': '[email protected]'}],
103+
'fax': {'count': 1379,
104+
'value': '18607664502'},
105+
'name': {'count': 1,
106+
'value': 'ESPN, Inc.'},
107+
'org': {'count': 1, 'value': 'ESPN, Inc.'},
108+
'phone': {'count': 1034,
109+
'value': '18607662000'},
110+
'postal': {'count': 1387,
111+
'value': '06010-1001'},
112+
'state': {'count': 560553, 'value': 'CT'}},
113+
'registrant_name': {'count': 1, 'value': 'ESPN, Inc.'},
114+
'registrant_org': {'count': 1, 'value': 'ESPN, Inc.'},
115+
'registrar': {'count': 1, 'value': 'CSC CORPORATE DOMAINS, INC.'},
116+
'registrar_status': ['clienttransferprohibited '
117+
'serverdeleteprohibited '
118+
'servertransferprohibited '
119+
'serverupdateprohibited'],
120+
'soa_email': [{'count': 3232212,
121+
'value': '[email protected]'}],
122+
'spf_info': 'v=spf1 include:servers.mcsv.net mx '
123+
'ip4:74.123.203.125 ip4:74.123.200.120 '
124+
'ip4:74.123.200.35 ip4:74.123.200.36 '
125+
'ip4:74.123.203.98 ip4:74.123.200.222 '
126+
'include:_spf.emailcampaigns.net '
127+
'include:spf1.worldapp.com ~all',
128+
'ssl_info': [{'hash': {'count': 2,
129+
'value': 'f56f5316dc0b40c8ec6e70221d38b2551e90e85a'},
130+
'organization': {'count': 9505,
131+
'value': 'The Walt Disney '
132+
'Company'},
133+
'subject': {'count': 2,
134+
'value': 'CN=espn.com,O=The Walt '
135+
'Disney '
136+
'Company,L=Burbank,ST=California,C=US'}},
137+
{'hash': {'count': 1,
138+
'value': 'a5b3d9312bce1157b61d53d864d33ee2b226d4e2'},
139+
'subject': {'count': 1,
140+
'value': 'CN=redirect.espn.com'}}],
141+
'technical_contact': {'city': {'count': 110095,
142+
'value': 'Bristol'},
143+
'country': {'count': 189951325,
144+
'value': 'us'},
145+
'email': [{'count': 1355,
146+
'value': '[email protected]'}],
147+
'fax': {'count': 1379,
148+
'value': '18607664502'},
149+
'name': {'count': 1, 'value': 'ESPN, Inc.'},
150+
'org': {'count': 1, 'value': 'ESPN, Inc.'},
151+
'phone': {'count': 1034,
152+
'value': '18607662000'},
153+
'postal': {'count': 1387,
154+
'value': '06010-1001'},
155+
'state': {'count': 560553, 'value': 'CT'}},
156+
'tld': 'com',
157+
'website_response': 500,
158+
'whois_url': 'https://whois.domaintools.com/espn.com'}],
159+
'results_count': 1,
160+
'total_count': 1}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import json
2+
3+
4+
def espn():
5+
json_response = """
6+
{ "limit_exceeded": false, "has_more_results": false, "message": "Enjoy your data.", "results_count": 1, "total_count": 1, "results": [ { "domain": "espn.com", "whois_url": "https://whois.domaintools.com/espn.com", "adsense": { "value": "", "count": 0 }, "alexa": 183, "active": true, "google_analytics": { "value": "", "count": 0 }, "admin_contact": { "name": { "value": "ESPN, Inc.", "count": 1 }, "org": { "value": "ESPN, Inc.", "count": 1 }, "street": { "value": "", "count": 0 }, "city": { "value": "Bristol", "count": 110095 }, "state": { "value": "CT", "count": 560553 }, "postal": { "value": "06010-1001", "count": 1387 }, "country": { "value": "us", "count": 189951325 }, "phone": { "value": "18607662000", "count": 1034 }, "fax": { "value": "18607664502", "count": 1379 }, "email": [ { "value": "[email protected]", "count": 1356 } ] }, "billing_contact": { "name": { "value": "", "count": 0 }, "org": { "value": "", "count": 0 }, "street": { "value": "", "count": 0 }, "city": { "value": "", "count": 0 }, "state": { "value": "", "count": 0 }, "postal": { "value": "", "count": 0 }, "country": { "value": "", "count": 0 }, "phone": { "value": "", "count": 0 }, "fax": { "value": "", "count": 0 }, "email": [] }, "registrant_contact": { "name": { "value": "ESPN, Inc.", "count": 1 }, "org": { "value": "ESPN, Inc.", "count": 1 }, "street": { "value": "", "count": 0 }, "city": { "value": "Bristol", "count": 110095 }, "state": { "value": "CT", "count": 560553 }, "postal": { "value": "06010-1001", "count": 1387 }, "country": { "value": "us", "count": 189951325 }, "phone": { "value": "18607662000", "count": 1034 }, "fax": { "value": "18607664502", "count": 1379 }, "email": [ { "value": "[email protected]", "count": 1354 } ] }, "technical_contact": { "name": { "value": "ESPN, Inc.", "count": 1 }, "org": { "value": "ESPN, Inc.", "count": 1 }, "street": { "value": "", "count": 0 }, "city": { "value": "Bristol", "count": 110095 }, "state": { "value": "CT", "count": 560553 }, "postal": { "value": "06010-1001", "count": 1387 }, "country": { "value": "us", "count": 189951325 }, "phone": { "value": "18607662000", "count": 1034 }, "fax": { "value": "18607664502", "count": 1379 }, "email": [ { "value": "[email protected]", "count": 1355 } ] }, "create_date": { "value": "1994-10-04", "count": 31 }, "expiration_date": { "value": "2020-10-03", "count": 729160 }, "email_domain": [ { "value": "espn.com", "count": 1420 }, { "value": "cscglobal.com", "count": 1813887 } ], "soa_email": [ { "value": "[email protected]", "count": 3232212 } ], "ssl_email": [], "additional_whois_email": [ { "value": "[email protected]", "count": 1573632 } ], "ip": [ { "address": { "value": "13.224.13.26", "count": 5833 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon Technologies Inc.", "count": 14091796 } }, { "address": { "value": "13.224.13.62", "count": 5912 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon Technologies Inc.", "count": 14091796 } }, { "address": { "value": "13.224.13.66", "count": 5953 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon Technologies Inc.", "count": 14091796 } }, { "address": { "value": "13.224.13.80", "count": 6197 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon Technologies Inc.", "count": 14091796 } }, { "address": { "value": "99.86.32.125", "count": 5879 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon.com Inc.", "count": 5530009 } }, { "address": { "value": "99.86.32.27", "count": 5794 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon.com Inc.", "count": 5530009 } }, { "address": { "value": "99.86.32.32", "count": 5452 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon.com Inc.", "count": 5530009 } }, { "address": { "value": "99.86.32.4", "count": 6100 }, "asn": [ { "value": 16509, "count": 15806987 } ], "country_code": { "value": "us", "count": 333903806 }, "isp": { "value": "Amazon.com Inc.", "count": 5530009 } } ], "mx": [ { "host": { "value": "espn-com.mail.protection.outlook.com", "count": 1 }, "domain": { "value": "outlook.com", "count": 9083544 }, "ip": [ { "value": "104.47.45.36", "count": 435650 }, { "value": "104.47.44.36", "count": 432743 } ], "priority": 10 } ], "name_server": [ { "host": { "value": "ns-1045.awsdns-02.org", "count": 5945 }, "domain": { "value": "awsdns-02.org", "count": 52299 }, "ip": [ { "value": "205.251.196.21", "count": 6052 } ] }, { "host": { "value": "ns-122.awsdns-15.com", "count": 6026 }, "domain": { "value": "awsdns-15.com", "count": 49898 }, "ip": [ { "value": "205.251.192.122", "count": 6199 } ] }, { "host": { "value": "ns-1936.awsdns-50.co.uk", "count": 6183 }, "domain": { "value": "co.uk", "count": 8440243 }, "ip": [ { "value": "205.251.199.144", "count": 6273 } ] }, { "host": { "value": "ns-846.awsdns-41.net", "count": 6826 }, "domain": { "value": "awsdns-41.net", "count": 48670 }, "ip": [ { "value": "205.251.195.78", "count": 6838 } ] } ], "domain_risk": { "risk_score": 0, "components": [ { "name": "allowlist", "risk_score": 0 } ] }, "redirect": { "value": "espn.go.com", "count": 454 }, "redirect_domain": { "value": "go.com", "count": 62216 }, "registrant_name": { "value": "ESPN, Inc.", "count": 1 }, "registrant_org": { "value": "ESPN, Inc.", "count": 1 }, "registrar": { "value": "CSC CORPORATE DOMAINS, INC.", "count": 1 }, "registrar_status": [ "clienttransferprohibited serverdeleteprohibited servertransferprohibited serverupdateprohibited" ], "spf_info": "v=spf1 include:servers.mcsv.net mx ip4:74.123.203.125 ip4:74.123.200.120 ip4:74.123.200.35 ip4:74.123.200.36 ip4:74.123.203.98 ip4:74.123.200.222 include:_spf.emailcampaigns.net include:spf1.worldapp.com ~all", "ssl_info": [ { "hash": { "value": "f56f5316dc0b40c8ec6e70221d38b2551e90e85a", "count": 2 }, "subject": { "value": "CN=espn.com,O=The Walt Disney Company,L=Burbank,ST=California,C=US", "count": 2 }, "organization": { "value": "The Walt Disney Company", "count": 9505 }, "email": [] }, { "hash": { "value": "a5b3d9312bce1157b61d53d864d33ee2b226d4e2", "count": 1 }, "subject": { "value": "CN=redirect.espn.com", "count": 1 }, "organization": { "value": "", "count": 0 }, "email": [] } ], "tld": "com", "website_response": 500, "data_updated_timestamp": "2020-04-14T09:09:06", "tags": [] } ], "missing_domains": [] }
7+
"""
8+
9+
return json.loads(json_response)

tests/test_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from datetime import datetime, timedelta
22
from tests.settings import utils
3+
from tests.responses.iris_investage_data import espn
4+
from tests.responses.expected_data import prune_espn_expected
5+
36

47
def test_get_domain_age():
58
create_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
@@ -102,4 +105,9 @@ def test_detect_average_age():
102105

103106
domains = []
104107
result = utils.get_average_risk_score(domains)
105-
assert result == None
108+
assert result == None
109+
110+
def test_data_prune():
111+
data = espn()
112+
utils.prune_data(data)
113+
assert data == prune_espn_expected()

0 commit comments

Comments
 (0)