Skip to content

Commit 3c2d37b

Browse files
authored
Revert "IDEV-2020: Handle partial response from RTUF endpoints."
1 parent bb0e087 commit 3c2d37b

File tree

7 files changed

+99
-44215
lines changed

7 files changed

+99
-44215
lines changed

domaintools/base_results.py

Lines changed: 20 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -77,53 +77,7 @@ def _wait_time(self):
7777

7878
return wait_for
7979

80-
def _get_feeds_results_generator(self, parameters, headers):
81-
with Client(verify=self.api.verify_ssl, proxy=self.api.proxy_url, timeout=None) as session:
82-
status_code = None
83-
while status_code != 200:
84-
resp_data = session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
85-
status_code = resp_data.status_code
86-
self.setStatus(status_code, resp_data)
87-
88-
# Check limit exceeded here
89-
if "response" in resp_data.text and "limit_exceeded" in resp_data.text:
90-
self._limit_exceeded = True
91-
self._limit_exceeded_message = "limit exceeded"
92-
93-
yield resp_data
94-
95-
if self._limit_exceeded:
96-
raise ServiceException(503, "Limit Exceeded{}".format(self._limit_exceeded_message))
97-
98-
if not self.kwargs.get("sessionID"):
99-
# we'll only do iterative request for queries that has sessionID.
100-
# Otherwise, we will have an infinite request if sessionID was not provided but the required data asked is more than the maximum (1 hour of data)
101-
break
102-
103-
def _get_session_params(self):
104-
parameters = deepcopy(self.kwargs)
105-
parameters.pop("output_format", None)
106-
parameters.pop(
107-
"format", None
108-
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
109-
headers = {}
110-
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
111-
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
112-
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
113-
114-
header_api_key = parameters.pop("X-Api-Key", None)
115-
if header_api_key:
116-
headers["X-Api-Key"] = header_api_key
117-
118-
return {"parameters": parameters, "headers": headers}
119-
12080
def _make_request(self):
121-
if self.product in FEEDS_PRODUCTS_LIST:
122-
session_params = self._get_session_params()
123-
parameters = session_params.get("parameters")
124-
headers = session_params.get("headers")
125-
126-
return self._get_feeds_results_generator(parameters=parameters, headers=headers)
12781

12882
with Client(verify=self.api.verify_ssl, proxy=self.api.proxy_url, timeout=None) as session:
12983
if self.product in [
@@ -138,15 +92,30 @@ def _make_request(self):
13892
patch_data = self.kwargs.copy()
13993
patch_data.update(self.api.extra_request_params)
14094
return session.patch(url=self.url, json=patch_data)
95+
elif self.product in FEEDS_PRODUCTS_LIST:
96+
parameters = deepcopy(self.kwargs)
97+
parameters.pop("output_format", None)
98+
parameters.pop(
99+
"format", None
100+
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
101+
headers = {}
102+
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
103+
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
104+
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
105+
106+
header_api_key = parameters.pop("X-Api-Key", None)
107+
if header_api_key:
108+
headers["X-Api-Key"] = header_api_key
109+
110+
return session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
141111
else:
142112
return session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
143113

144114
def _get_results(self):
145115
wait_for = self._wait_time()
146116
if self.api.rate_limit and (wait_for is None or self.product == "account-information"):
147117
data = self._make_request()
148-
status_code = data.status_code if self.product not in FEEDS_PRODUCTS_LIST else 200
149-
if status_code == 503: # pragma: no cover
118+
if data.status_code == 503: # pragma: no cover
150119
sleeptime = 60
151120
log.info(
152121
"503 encountered for [%s] - sleeping [%s] seconds before retrying request.",
@@ -166,15 +135,12 @@ def _get_results(self):
166135
def data(self):
167136
if self._data is None:
168137
results = self._get_results()
169-
status_code = results.status_code if self.product not in FEEDS_PRODUCTS_LIST else 200
170-
self.setStatus(status_code, results)
138+
self.setStatus(results.status_code, results)
171139
if (
172140
self.kwargs.get("format", "json") == "json"
173141
and self.product not in FEEDS_PRODUCTS_LIST # Special handling of feeds products' data to preserve the result in jsonline format
174142
):
175143
self._data = results.json()
176-
elif self.product in FEEDS_PRODUCTS_LIST:
177-
self._data = results # Uses generator to handle large data results from feeds endpoint
178144
else:
179145
self._data = results.text
180146
limit_exceeded, message = self.check_limit_exceeded()
@@ -189,10 +155,6 @@ def data(self):
189155
return self._data
190156

191157
def check_limit_exceeded(self):
192-
if self.product in FEEDS_PRODUCTS_LIST:
193-
# bypass here as this is handled in generator already
194-
return False, ""
195-
196158
if self.kwargs.get("format", "json") == "json" and self.product not in FEEDS_PRODUCTS_LIST:
197159
if "response" in self._data and "limit_exceeded" in self._data["response"] and self._data["response"]["limit_exceeded"] is True:
198160
return True, self._data["response"]["message"]
@@ -210,7 +172,7 @@ def status(self):
210172

211173
def setStatus(self, code, response=None):
212174
self._status = code
213-
if code == 200 or (self.product in FEEDS_PRODUCTS_LIST and code == 206):
175+
if code == 200:
214176
return
215177

216178
reason = None
@@ -249,7 +211,7 @@ def response(self):
249211
return self._response
250212

251213
def items(self):
252-
return self.response().items() if isinstance(self.response(), dict) else self.response()
214+
return self.response().items()
253215

254216
def emails(self):
255217
"""Find and returns all emails mentioned in the response"""

domaintools/cli/api.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from rich.progress import Progress, SpinnerColumn, TextColumn
1010

1111
from domaintools.api import API
12-
from domaintools.constants import Endpoint, OutputFormat, FEEDS_PRODUCTS_LIST
12+
from domaintools.constants import Endpoint, OutputFormat
1313
from domaintools.cli.utils import get_file_extension
1414
from domaintools.exceptions import ServiceException
1515
from domaintools._version import current as version
@@ -110,9 +110,6 @@ def args_to_dict(*args) -> Dict:
110110
def _get_formatted_output(cls, cmd_name: str, response, out_format: str = "json"):
111111
if cmd_name in ("available_api_calls",):
112112
return "\n".join(response)
113-
if response.product in FEEDS_PRODUCTS_LIST:
114-
return "\n".join([data.text for data in response])
115-
116113
return str(getattr(response, out_format) if out_format != "list" else response.as_list())
117114

118115
@classmethod
@@ -230,10 +227,7 @@ def run(cls, name: str, params: Optional[Dict] = {}, **kwargs):
230227

231228
if isinstance(out_file, _io.TextIOWrapper):
232229
# use rich `print` command to prettify the ouput in sys.stdout
233-
if response.product in FEEDS_PRODUCTS_LIST:
234-
print(output)
235-
else:
236-
print(response)
230+
print(response)
237231
else:
238232
# if it's a file then write
239233
out_file.write(output if output.endswith("\n") else output + "\n")

domaintools_async/__init__.py

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22

33
import asyncio
44

5+
from copy import deepcopy
56
from httpx import AsyncClient
67

78
from domaintools.base_results import Results
8-
from domaintools.constants import FEEDS_PRODUCTS_LIST
9-
from domaintools.exceptions import ServiceUnavailableException, ServiceException
9+
from domaintools.constants import FEEDS_PRODUCTS_LIST, OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
10+
from domaintools.exceptions import ServiceUnavailableException
1011

1112

1213
class _AIter(object):
@@ -41,26 +42,6 @@ class AsyncResults(Results):
4142
def __await__(self):
4243
return self.__awaitable__().__await__()
4344

44-
async def _get_feeds_async_results_generator(self, session, parameters, headers):
45-
status_code = None
46-
while status_code != 200:
47-
resp_data = await session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
48-
status_code = resp_data.status_code
49-
self.setStatus(status_code, resp_data)
50-
51-
# Check limit exceeded here
52-
if "response" in resp_data.text and "limit_exceeded" in resp_data.text:
53-
self._limit_exceeded = True
54-
self._limit_exceeded_message = "limit exceeded"
55-
yield resp_data
56-
57-
if self._limit_exceeded:
58-
raise ServiceException(503, "Limit Exceeded{}".format(self._limit_exceeded_message))
59-
if not self.kwargs.get("sessionID"):
60-
# we'll only do iterative request for queries that has sessionID.
61-
# Otherwise, we will have an infinite request if sessionID was not provided but the required data asked is more than the maximum (1 hour of data)
62-
break
63-
6445
async def _make_async_request(self, session):
6546
if self.product in ["iris-investigate", "iris-enrich", "iris-detect-escalate-domains"]:
6647
post_data = self.kwargs.copy()
@@ -71,19 +52,27 @@ async def _make_async_request(self, session):
7152
patch_data.update(self.api.extra_request_params)
7253
results = await session.patch(url=self.url, json=patch_data)
7354
elif self.product in FEEDS_PRODUCTS_LIST:
74-
generator_params = self._get_session_params()
75-
parameters = generator_params.get("parameters")
76-
headers = generator_params.get("headers")
77-
results = await self._get_feeds_async_results_generator(session=session, parameters=parameters, headers=headers)
55+
parameters = deepcopy(self.kwargs)
56+
parameters.pop("output_format", None)
57+
parameters.pop(
58+
"format", None
59+
) # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
60+
headers = {}
61+
if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
62+
parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
63+
headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
64+
65+
header_api_key = parameters.pop("X-Api-Key", None)
66+
if header_api_key:
67+
headers["X-Api-Key"] = header_api_key
68+
69+
results = await session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
7870
else:
7971
results = await session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
8072
if results:
81-
status_code = results.status_code if self.product not in FEEDS_PRODUCTS_LIST else 200
82-
self.setStatus(status_code, results)
73+
self.setStatus(results.status_code, results)
8374
if self.kwargs.get("format", "json") == "json":
8475
self._data = results.json()
85-
elif self.product in FEEDS_PRODUCTS_LIST:
86-
self._data = results # Uses generator to handle large data results from feeds endpoint
8776
else:
8877
self._data = results.text()
8978
limit_exceeded, message = self.check_limit_exceeded()
@@ -94,6 +83,7 @@ async def _make_async_request(self, session):
9483

9584
async def __awaitable__(self):
9685
if self._data is None:
86+
9787
async with AsyncClient(verify=self.api.verify_ssl, proxy=self.api.proxy_url, timeout=None) as session:
9888
wait_time = self._wait_time()
9989
if wait_time is None and self.api:

0 commit comments

Comments
 (0)