DomainTools
diff --git a/‎README.md‎
Lines changed: 59 additions & 1 deletion b/‎README.md‎
Lines changed: 59 additions & 1 deletion
diff --git a/‎VERSION‎
Lines changed: 1 addition & 1 deletion b/‎VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎domaintools/_version.py‎
Lines changed: 1 addition & 1 deletion b/‎domaintools/_version.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎domaintools/api.py‎
Lines changed: 65 additions & 22 deletions b/‎domaintools/api.py‎
Lines changed: 65 additions & 22 deletions
diff --git a/‎domaintools/base_results.py‎
Lines changed: 67 additions & 26 deletions b/‎domaintools/base_results.py‎
Lines changed: 67 additions & 26 deletions
@@ -211,5 +211,63 @@ API_KEY
 Python Version Support Policy
 ===================
 
-Please see the [supported versions](https://github.com/DomainTools/python_api/raw/main/PYTHON_SUPPORT.md) document 
+Please see the [supported versions](https://github.com/DomainTools/python_api/raw/main/PYTHON_SUPPORT.md) document
 for the DomainTools Python support policy.
+
+
+Real-Time Threat Intelligence Feeds
+===================
+
+Real-Time Threat Intelligence Feeds provide data on the different stages of the domain lifecycle: from first-observed in the wild, to newly re-activated after a period of quiet. Access current feed data in real-time or retrieve historical feed data through separate APIs.
+
+Custom parameters aside from the common `GET` Request parameters:
+- `endpoint` (choose either `download` or `feed` API endpoint - default is `feed`)
+    ```python
+    api = API(USERNAME, KEY, always_sign_api_key=False)
+    api.nod(endpoint="feed", **kwargs)
+    ```
+- `header_authentication`: by default, we're using API Header Authentication. Set this False if you want to use API Key and Secret Authentication. Apparently, you can't use API Header Authentication for `download` endpoints so this will be defaulted to `False` even without explicitly setting it.
+    ```python
+    api = API(USERNAME, KEY, always_sign_api_key=False)
+    api.nod(header_authentication=False, **kwargs)
+    ```
+- `output_format`: (choose either `csv` or `jsonl` - default is `jsonl`). Cannot be used in `domainrdap` feeds. Additionally, `csv` is not available for `download` endpoints.
+    ```python
+    api = API(USERNAME, KEY, always_sign_api_key=False)
+    api.nod(output_format="csv", **kwargs)
+    ```
+
+The Feed API standard access pattern is to periodically request the most recent feed data, as often as every 60 seconds. Specify the range of data you receive in one of two ways:
+
+1. With `sessionID`: Make a call and provide a new `sessionID` parameter of your choosing. The API will return the last hour of data by default.
+    - Each subsequent call to the API using your `sessionID` will return all data since the last.
+    - Any single request returns a maximum of 10M results. Requests that exceed 10M results will return a HTTP 206 response code; repeat the same request (with the same `sessionID`) to receive the next tranche of data until receiving a HTTP 200 response code.
+2. Or, specify the time range in one of two ways:
+    - Either an `after=-60` query parameter, where (in this example) -60 indicates the previous 60 seconds.
+    - Or `after` and `before` query parameters for a time range, with each parameter accepting an ISO-8601 UTC formatted timestamp (a UTC date and time of the format YYYY-MM-DDThh:mm:ssZ)
+
+## Handling iterative response from RTUF endpoints:
+
+Since we may dealing with large feeds datasets, the python wrapper uses `generator` for efficient memory handling. Therefore, we need to iterate through the `generator` if we're accessing the partial results of the feeds data.
+
+### Single request because the requested data is within the maximum result:
+```python
+from domaintools import API
+
+api = API(USERNAME, KEY, always_sign_api_key=False)
+results = api.nod(sessionID="my-session-id", after=-60)
+
+for result in results.response() # generator that holds NOD feeds data for the past 60 seconds and is expected to request only once
+    # do things to result
+```
+
+## Multiple requests because the requested data is more than the maximum result per request:
+```python
+from domaintools import API
+
+api = API(USERNAME, KEY, always_sign_api_key=False)
+results = api.nod(sessionID="my-session-id", after=-7200)
+
+for partial_result in results.response() # generator that holds NOD feeds data for the past 2 hours and is expected to request multiple times
+    # do things to partial_result
+```
@@ -1 +1 @@
-2.2.0
+2.3.0
@@ -20,4 +20,4 @@
 
 """
 
-current = "2.2.0"
+current = "2.3.0"
@@ -1,15 +1,18 @@
 from datetime import datetime, timedelta, timezone
 from hashlib import sha1, sha256, md5
 from hmac import new as hmac
+
 import re
 
+from domaintools.constants import Endpoint, ENDPOINT_TO_SOURCE_MAP, FEEDS_PRODUCTS_LIST, OutputFormat
 from domaintools._version import current as version
 from domaintools.results import (
     GroupedIterable,
     ParsedWhois,
     ParsedDomainRdap,
     Reputation,
     Results,
+    FeedsResults,
 )
 from domaintools.filters import (
     filter_by_riskscore,
@@ -18,6 +21,8 @@
     filter_by_field,
     DTResultFilter,
 )
+from domaintools.utils import validate_feeds_parameters
+
 
 AVAILABLE_KEY_SIGN_HASHES = ["sha1", "sha256", "md5"]
 
@@ -84,11 +89,8 @@ def __init__(
 
         if not https:
             raise Exception("The DomainTools API endpoints no longer support http traffic. Please make sure https=True.")
-        if proxy_url:
-            if isinstance(proxy_url, str):
-                self.proxy_url = {"http://": proxy_url, "https://": proxy_url}
-            else:
-                raise Exception("Proxy URL must be a string. For example: '127.0.0.1:8888'")
+        if proxy_url and not isinstance(proxy_url, str):
+            raise Exception("Proxy URL must be a string. For example: '127.0.0.1:8888'")
 
     def _build_api_url(self, api_url=None, api_port=None):
         """Build the API url based on the given url and port. Defaults to `https://api.domaintools.com`"""
@@ -122,14 +124,18 @@ def _results(self, product, path, cls=Results, **kwargs):
         uri = "/".join((self._rest_api_url, path.lstrip("/")))
         parameters = self.default_parameters.copy()
         parameters["api_username"] = self.username
-        self.handle_api_key(path, parameters)
+        header_authentication = kwargs.pop("header_authentication", True)  # Used only by Real-Time Threat Intelligence Feeds endpoints for now
+        self.handle_api_key(product, path, parameters, header_authentication)
         parameters.update({key: str(value).lower() if value in (True, False) else value for key, value in kwargs.items() if value is not None})
 
         return cls(self, product, uri, **parameters)
 
-    def handle_api_key(self, path, parameters):
+    def handle_api_key(self, product, path, parameters, header_authentication):
         if self.https and not self.always_sign_api_key:
-            parameters["api_key"] = self.key
+            if product in FEEDS_PRODUCTS_LIST and header_authentication:
+                parameters["X-Api-Key"] = self.key
+            else:
+                parameters["api_key"] = self.key
         else:
             if self.key_sign_hash and self.key_sign_hash in AVAILABLE_KEY_SIGN_HASHES:
                 signing_hash = eval(self.key_sign_hash)
@@ -1058,30 +1064,67 @@ def iris_detect_ignored_domains(
             **kwargs,
         )
 
-    def nod(self, **kwargs):
+    def nod(self, **kwargs) -> FeedsResults:
         """Returns back list of the newly observed domains feed"""
-        sessionID = kwargs.get("sessionID")
-        after = kwargs.get("after")
-        if not (sessionID or after):
-            raise ValueError("sessionID or after (can be both) must be defined")
+        validate_feeds_parameters(kwargs)
+        endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
+        source = ENDPOINT_TO_SOURCE_MAP.get(endpoint)
+        if endpoint == Endpoint.DOWNLOAD.value or kwargs.get("output_format", OutputFormat.JSONL.value) != OutputFormat.CSV.value:
+            # headers param is allowed only in Feed API and CSV format
+            kwargs.pop("headers", None)
 
         return self._results(
-            "newly-observed-domains-feed-(api)",
-            "v1/feed/nod/",
+            f"newly-observed-domains-feed-({source.value})",
+            f"v1/{endpoint}/nod/",
             response_path=(),
+            cls=FeedsResults,
             **kwargs,
         )
 
-    def nad(self, **kwargs):
+    def nad(self, **kwargs) -> FeedsResults:
         """Returns back list of the newly active domains feed"""
-        sessionID = kwargs.get("sessionID")
-        after = kwargs.get("after")
-        if not (sessionID or after):
-            raise ValueError("sessionID or after (can be both) must be defined")
+        validate_feeds_parameters(kwargs)
+        endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
+        source = ENDPOINT_TO_SOURCE_MAP.get(endpoint).value
+        if endpoint == Endpoint.DOWNLOAD.value or kwargs.get("output_format", OutputFormat.JSONL.value) != OutputFormat.CSV.value:
+            # headers param is allowed only in Feed API and CSV format
+            kwargs.pop("headers", None)
+
+        return self._results(
+            f"newly-active-domains-feed-({source})",
+            f"v1/{endpoint}/nad/",
+            response_path=(),
+            cls=FeedsResults,
+            **kwargs,
+        )
+
+    def domainrdap(self, **kwargs) -> FeedsResults:
+        """Returns changes to global domain registration information, populated by the Registration Data Access Protocol (RDAP)"""
+        validate_feeds_parameters(kwargs)
+        endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
+        source = ENDPOINT_TO_SOURCE_MAP.get(endpoint).value
+
+        return self._results(
+            f"domain-registration-data-access-protocol-feed-({source})",
+            f"v1/{endpoint}/domainrdap/",
+            response_path=(),
+            cls=FeedsResults,
+            **kwargs,
+        )
+
+    def domaindiscovery(self, **kwargs) -> FeedsResults:
+        """Returns new domains as they are either discovered in domain registration information, observed by our global sensor network, or reported by trusted third parties"""
+        validate_feeds_parameters(kwargs)
+        endpoint = kwargs.pop("endpoint", Endpoint.FEED.value)
+        source = ENDPOINT_TO_SOURCE_MAP.get(endpoint).value
+        if endpoint == Endpoint.DOWNLOAD.value or kwargs.get("output_format", OutputFormat.JSONL.value) != OutputFormat.CSV.value:
+            # headers param is allowed only in Feed API and CSV format
+            kwargs.pop("headers", None)
 
         return self._results(
-            "newly-active-domains-feed-(api)",
-            "v1/feed/nad/",
+            f"real-time-domain-discovery-feed-({source})",
+            f"v1/{endpoint}/domaindiscovery/",
             response_path=(),
+            cls=FeedsResults,
             **kwargs,
         )
@@ -4,8 +4,12 @@
 import re
 import time
 import logging
+
+from copy import deepcopy
 from datetime import datetime
+from httpx import Client
 
+from domaintools.constants import FEEDS_PRODUCTS_LIST, OutputFormat, HEADER_ACCEPT_KEY_CSV_FORMAT
 from domaintools.exceptions import (
     BadRequestException,
     InternalServerErrorException,
@@ -16,9 +20,7 @@
     IncompleteResponseException,
     RequestUriTooLongException,
 )
-from domaintools.utils import get_feeds_products_list
 
-from httpx import Client
 
 try:  # pragma: no cover
     from collections.abc import MutableMapping, MutableSequence
@@ -51,8 +53,6 @@ def __init__(
         self._response = None
         self._items_list = None
         self._data = None
-        self._limit_exceeded = None
-        self._limit_exceeded_message = None
 
     def _wait_time(self):
         if not self.api.rate_limit or not self.product in self.api.limits:
@@ -75,6 +75,23 @@ def _wait_time(self):
 
         return wait_for
 
+    def _get_session_params(self):
+        parameters = deepcopy(self.kwargs)
+        parameters.pop("output_format", None)
+        parameters.pop(
+            "format", None
+        )  # For some unknownn reasons, even if "format" is not included in the cli params for feeds endpoint, it is being populated thus we need to remove it. Happens only if using CLI.
+        headers = {}
+        if self.kwargs.get("output_format", OutputFormat.JSONL.value) == OutputFormat.CSV.value:
+            parameters["headers"] = int(bool(self.kwargs.get("headers", False)))
+            headers["accept"] = HEADER_ACCEPT_KEY_CSV_FORMAT
+
+        header_api_key = parameters.pop("X-Api-Key", None)
+        if header_api_key:
+            headers["X-Api-Key"] = header_api_key
+
+        return {"parameters": parameters, "headers": headers}
+
     def _make_request(self):
 
         with Client(verify=self.api.verify_ssl, proxy=self.api.proxy_url, timeout=None) as session:
@@ -90,6 +107,11 @@ def _make_request(self):
                 patch_data = self.kwargs.copy()
                 patch_data.update(self.api.extra_request_params)
                 return session.patch(url=self.url, json=patch_data)
+            elif self.product in FEEDS_PRODUCTS_LIST:
+                session_params = self._get_session_params()
+                parameters = session_params.get("parameters")
+                headers = session_params.get("headers")
+                return session.get(url=self.url, params=parameters, headers=headers, **self.api.extra_request_params)
             else:
                 return session.get(url=self.url, params=self.kwargs, **self.api.extra_request_params)
 
@@ -118,33 +140,26 @@ def data(self):
         if self._data is None:
             results = self._get_results()
             self.setStatus(results.status_code, results)
-            if (
-                self.kwargs.get("format", "json") == "json"
-                and self.product
-                not in get_feeds_products_list()  # Special handling of feeds products' data to preserve the result in jsonline format
-            ):
+            if self.kwargs.get("format", "json") == "json":
                 self._data = results.json()
             else:
                 self._data = results.text
-            limit_exceeded, message = self.check_limit_exceeded()
 
-            if limit_exceeded:
-                self._limit_exceeded = True
-                self._limit_exceeded_message = message
+        self.check_limit_exceeded()
 
-        if self._limit_exceeded is True:
-            raise ServiceException(503, "Limit Exceeded{}".format(self._limit_exceeded_message))
-        else:
-            return self._data
+        return self._data
 
     def check_limit_exceeded(self):
-        if self.kwargs.get("format", "json") == "json":
-            if "response" in self._data and "limit_exceeded" in self._data["response"] and self._data["response"]["limit_exceeded"] is True:
-                return True, self._data["response"]["message"]
-        # TODO: handle html, xml response errors better.
+        limit_exceeded, reason = False, ""
+        if isinstance(self._data, dict) and (
+            "response" in self._data and "limit_exceeded" in self._data["response"] and self._data["response"]["limit_exceeded"] is True
+        ):
+            limit_exceeded, reason = True, self._data["response"]["message"]
         elif "response" in self._data and "limit_exceeded" in self._data:
-            return True, "limit exceeded"
-        return False, ""
+            limit_exceeded = True
+
+        if limit_exceeded:
+            raise ServiceException(503, f"Limit Exceeded {reason}")
 
     @property
     def status(self):
@@ -155,7 +170,7 @@ def status(self):
 
     def setStatus(self, code, response=None):
         self._status = code
-        if code == 200:
+        if code == 200 or (self.product in FEEDS_PRODUCTS_LIST and code == 206):
             return
 
         reason = None
@@ -167,9 +182,9 @@ def setStatus(self, code, response=None):
                 if callable(reason):
                     reason = reason()
 
-        if code == 400:
+        if code in (400, 422):
             raise BadRequestException(code, reason)
-        elif code == 403:
+        elif code in (401, 403):
             raise NotAuthorizedException(code, reason)
         elif code == 404:
             raise NotFoundException(code, reason)
@@ -259,6 +274,32 @@ def json(self):
             **self.kwargs,
         )
 
+    @property
+    def jsonl(self):
+        self.kwargs.pop("format", None)
+        return self.__class__(
+            format="jsonl",
+            product=self.product,
+            url=self.url,
+            items_path=self.items_path,
+            response_path=self.response_path,
+            api=self.api,
+            **self.kwargs,
+        )
+
+    @property
+    def csv(self):
+        self.kwargs.pop("format", None)
+        return self.__class__(
+            format="csv",
+            product=self.product,
+            url=self.url,
+            items_path=self.items_path,
+            response_path=self.response_path,
+            api=self.api,
+            **self.kwargs,
+        )
+
     @property
     def xml(self):
         self.kwargs.pop("format", None)
Original file line number	Diff line number	Diff line change
`@@ -20,4 +20,4 @@`
`20`	`20`
`21`	`21`	`"""`
`22`	`22`
`23`		`-current = "2.2.0"`
	`23`	`+current = "2.3.0"`