Skip to content

Commit a642651

Browse files
committed
IDEV-1996: Implement domaindiscovery python wrapper endpoint.
1 parent f081335 commit a642651

File tree

5 files changed

+116
-10
lines changed

5 files changed

+116
-10
lines changed

domaintools/api.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from hmac import new as hmac
44
import re
55

6+
from domaintools.constants import Endpoint, ENDPOINT_TO_SOURCE_MAP, OutputFormat
67
from domaintools._version import current as version
78
from domaintools.results import (
89
GroupedIterable,
@@ -18,6 +19,8 @@
1819
filter_by_field,
1920
DTResultFilter,
2021
)
22+
from domaintools.utils import validate_feeds_required_parameters
23+
2124

2225
AVAILABLE_KEY_SIGN_HASHES = ["sha1", "sha256", "md5"]
2326

@@ -1088,15 +1091,28 @@ def nad(self, **kwargs):
10881091

10891092
def domainrdap(self, **kwargs):
10901093
"""Returns changes to global domain registration information, populated by the Registration Data Access Protocol (RDAP)"""
1091-
sessionID = kwargs.get("sessionID")
1092-
after = kwargs.get("after")
1093-
before = kwargs.get("before")
1094-
if not (sessionID or after or before):
1095-
raise ValueError("sessionID or after or before must be defined")
1094+
validate_feeds_required_parameters(kwargs)
1095+
endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
1096+
source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)
1097+
1098+
return self._results(
1099+
f"domain-registration-data-access-protocol-feed-({source.value})",
1100+
f"v1/{endpoint}/domainrdap/",
1101+
response_path=(),
1102+
**kwargs,
1103+
)
1104+
1105+
def domaindiscovery(self, **kwargs):
1106+
"""Returns new domains as they are either discovered in domain registration information, observed by our global sensor network, or reported by trusted third parties"""
1107+
validate_feeds_required_parameters(kwargs)
1108+
endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
1109+
source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)
1110+
if endpoint == Endpoint.DOWNLOAD.value or kwargs.get("output_format", OutputFormat.JSONL.value) != OutputFormat.CSV.value:
1111+
kwargs.pop("headers", None)
10961112

10971113
return self._results(
1098-
"domain-registration-data-access-protocol-feed-(api)",
1099-
"v1/feed/domainrdap/",
1114+
f"real-time-domain-discovery-feed-({source.value})",
1115+
f"v1/{endpoint}/domaindiscovery/",
11001116
response_path=(),
11011117
**kwargs,
11021118
)

domaintools/base_results.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
import re
55
import time
66
import logging
7+
8+
from copy import deepcopy
79
from datetime import datetime
10+
from httpx import Client
811

12+
from domaintools.constants import OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
913
from domaintools.exceptions import (
1014
BadRequestException,
1115
InternalServerErrorException,
@@ -18,7 +22,6 @@
1822
)
1923
from domaintools.utils import get_feeds_products_list
2024

21-
from httpx import Client
2225

2326
try: # pragma: no cover
2427
from collections.abc import MutableMapping, MutableSequence
@@ -90,6 +93,18 @@ def _make_request(self):
9093
patch_data = self.kwargs.copy()
9194
patch_data.update(self.api.extra_request_params)
9295
return session.patch(url=self.url, json=patch_data)
96+
elif self.product in get_feeds_products_list():
97+
parameters = deepcopy(self.kwargs)
98+
parameters.pop("output_format", None)
99+
parameters.pop(
100+
"format", None
101+
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
102+
headers = {}
103+
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
104+
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
105+
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
106+
107+
return session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
93108
else:
94109
return session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
95110

@@ -259,6 +274,32 @@ def json(self):
259274
**self.kwargs,
260275
)
261276

277+
@property
278+
def jsonl(self):
279+
self.kwargs.pop("format", None)
280+
return self.__class__(
281+
format="jsonl",
282+
product=self.product,
283+
url=self.url,
284+
items_path=self.items_path,
285+
response_path=self.response_path,
286+
api=self.api,
287+
**self.kwargs,
288+
)
289+
290+
@property
291+
def csv(self):
292+
self.kwargs.pop("format", None)
293+
return self.__class__(
294+
format="csv",
295+
product=self.product,
296+
url=self.url,
297+
items_path=self.items_path,
298+
response_path=self.response_path,
299+
api=self.api,
300+
**self.kwargs,
301+
)
302+
262303
@property
263304
def xml(self):
264305
self.kwargs.pop("format", None)

domaintools/constants.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from enum import Enum
2+
3+
4+
class Endpoint(Enum):
5+
FEED = "feed"
6+
DOWNLOAD = "download"
7+
8+
9+
class Source(Enum):
10+
API = "api"
11+
S3 = "s3"
12+
13+
14+
class OutputFormat(Enum):
15+
JSONL = "jsonl"
16+
CSV = "csv"
17+
18+
19+
HEADER_ACCEPT_KEY_CSV_FORMAT = "text/csv"
20+
21+
ENDPOINT_TO_SOURCE_MAP = {
22+
Endpoint.FEED.value: Source.API,
23+
Endpoint.DOWNLOAD.value: Source.S3,
24+
}

domaintools/utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,4 +176,15 @@ def get_feeds_products_list():
176176
"newly-active-domains-feed-(api)",
177177
"newly-observed-domains-feed-(api)",
178178
"domain-registration-data-access-protocol-feed-(api)",
179+
"domain-registration-data-access-protocol-feed-(s3)",
180+
"real-time-domain-discovery-feed-(api)",
181+
"real-time-domain-discovery-feed-(s3)",
179182
]
183+
184+
185+
def validate_feeds_required_parameters(params):
186+
sessionID = params.get("sessionID")
187+
after = params.get("after")
188+
before = params.get("before")
189+
if not (sessionID or after or before):
190+
raise ValueError("sessionID or after or before must be defined")

domaintools_async/__init__.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
"""Adds async capabilities to the base product object"""
22

33
import asyncio
4+
5+
from copy import deepcopy
46
from httpx import AsyncClient
57

68
from domaintools.base_results import Results
7-
8-
from domaintools.exceptions import ServiceUnavailableException, ServiceException
9+
from domaintools.constants import OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
10+
from domaintools.exceptions import ServiceUnavailableException
11+
from domaintools.utils import get_feeds_products_list
912

1013

1114
class _AIter(object):
@@ -49,6 +52,17 @@ async def _make_async_request(self, session):
4952
patch_data = self.kwargs.copy()
5053
patch_data.update(self.api.extra_request_params)
5154
results = await session.patch(url=self.url, json=patch_data)
55+
elif self.product in get_feeds_products_list():
56+
parameters = deepcopy(self.kwargs)
57+
parameters.pop("output_format", None)
58+
parameters.pop(
59+
"format", None
60+
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
61+
headers = {}
62+
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
63+
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
64+
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
65+
results = await session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
5266
else:
5367
results = await session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
5468
if results:

0 commit comments

Comments
 (0)