Skip to content

Commit 901fcaf

Browse files
committed
support scaling/hard timeouts & graceful-fail for sync batch
1 parent 14d1ceb commit 901fcaf

File tree

4 files changed

+117
-34
lines changed

4 files changed

+117
-34
lines changed

ipinfo/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ class RequestQuotaExceededError(Exception):
77
"""Error indicating that users monthly request quota has been passed."""
88

99
pass
10+
11+
12+
class TimeoutExceededError(Exception):
13+
"""Error indicating that some timeout has been exceeded."""
14+
15+
pass

ipinfo/handler.py

Lines changed: 82 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,22 @@
66
import json
77
import os
88
import sys
9+
import time
910

1011
import requests
1112

1213
from .cache.default import DefaultCache
1314
from .details import Details
14-
from .exceptions import RequestQuotaExceededError
15+
from .exceptions import RequestQuotaExceededError, TimeoutExceededError
16+
from .handler_utils import (
17+
API_URL,
18+
COUNTRY_FILE_DEFAULT,
19+
BATCH_MAX_SIZE,
20+
CACHE_MAXSIZE,
21+
CACHE_TTL,
22+
REQUEST_TIMEOUT_DEFAULT,
23+
BATCH_REQ_TIMEOUT_DEFAULT,
24+
)
1525
from . import handler_utils
1626

1727

@@ -21,10 +31,6 @@ class Handler:
2131
Instantiates and maintains access to cache.
2232
"""
2333

24-
CACHE_MAXSIZE = 4096
25-
CACHE_TTL = 60 * 60 * 24
26-
REQUEST_TIMEOUT_DEFAULT = 2
27-
2834
def __init__(self, access_token=None, **kwargs):
2935
"""
3036
Initialize the Handler object with country name list and the
@@ -40,21 +46,26 @@ def __init__(self, access_token=None, **kwargs):
4046
# setup req opts
4147
self.request_options = kwargs.get("request_options", {})
4248
if "timeout" not in self.request_options:
43-
self.request_options["timeout"] = self.REQUEST_TIMEOUT_DEFAULT
49+
self.request_options["timeout"] = REQUEST_TIMEOUT_DEFAULT
4450

4551
# setup cache
4652
if "cache" in kwargs:
4753
self.cache = kwargs["cache"]
4854
else:
4955
cache_options = kwargs.get("cache_options", {})
5056
if "maxsize" not in cache_options:
51-
cache_options["maxsize"] = self.CACHE_MAXSIZE
57+
cache_options["maxsize"] = CACHE_MAXSIZE
5258
if "ttl" not in cache_options:
53-
cache_options["ttl"] = self.CACHE_TTL
59+
cache_options["ttl"] = CACHE_TTL
5460
self.cache = DefaultCache(**cache_options)
5561

56-
def getDetails(self, ip_address=None):
57-
"""Get details for specified IP address as a Details object."""
62+
def getDetails(self, ip_address=None, timeout=None):
63+
"""
64+
Get details for specified IP address as a Details object.
65+
66+
If `timeout` is not `None`, it will override the client-level timeout
67+
just for this operation.
68+
"""
5869
# If the supplied IP address uses the objects defined in the built-in
5970
# module ipaddress extract the appropriate string notation before
6071
# formatting the URL.
@@ -66,12 +77,17 @@ def getDetails(self, ip_address=None):
6677
if ip_address in self.cache:
6778
return Details(self.cache[ip_address])
6879

80+
# prepare req http opts
81+
req_opts = {**self.request_options}
82+
if timeout is not None:
83+
req_opts["timeout"] = timeout
84+
6985
# not in cache; do http req
70-
url = handler_utils.API_URL
86+
url = API_URL
7187
if ip_address:
7288
url += "/" + ip_address
7389
headers = handler_utils.get_headers(self.access_token)
74-
response = requests.get(url, headers=headers, **self.request_options)
90+
response = requests.get(url, headers=headers, **req_opts)
7591
if response.status_code == 429:
7692
raise RequestQuotaExceededError()
7793
response.raise_for_status()
@@ -83,7 +99,14 @@ def getDetails(self, ip_address=None):
8399

84100
return Details(details)
85101

86-
def getBatchDetails(self, ip_addresses, batch_size=None):
102+
def getBatchDetails(
103+
self,
104+
ip_addresses,
105+
batch_size=None,
106+
timeout_per_batch=BATCH_REQ_TIMEOUT_DEFAULT,
107+
timeout_total=None,
108+
raise_on_fail=True,
109+
):
87110
"""
88111
Get details for a batch of IP addresses at once.
89112
@@ -92,11 +115,26 @@ def getBatchDetails(self, ip_addresses, batch_size=None):
92115
all of the response data, which is at least a magnitude larger than the
93116
input list).
94117
118+
The input list is broken up into batches to abide by API requirements.
95119
The batch size can be adjusted with `batch_size` but is clipped to (and
96-
also defaults to) `handler_utils.BATCH_MAX_SIZE`.
120+
also defaults to) `BATCH_MAX_SIZE`.
121+
122+
For each batch, `timeout_per_batch` indicates the maximum seconds to
123+
spend waiting for the HTTP request to complete. If any batch fails with
124+
this timeout, the whole operation fails.
125+
Defaults to `BATCH_REQ_TIMEOUT_DEFAULT` seconds.
126+
127+
`timeout_total` is a seconds-denominated hard-timeout for the time
128+
spent in HTTP operations; regardless of whether all batches have
129+
succeeded so far, if `timeout_total` is reached, the whole operation
130+
will fail. Defaults to being turned off.
131+
132+
`raise_on_fail`, if turned off, will return any result retrieved so far
133+
rather than raise an exception when errors occur, including timeout and
134+
quota errors. Defaults to on.
97135
"""
98136
if batch_size == None:
99-
batch_size = handler_utils.BATCH_MAX_SIZE
137+
batch_size = BATCH_MAX_SIZE
100138

101139
result = {}
102140

@@ -117,23 +155,44 @@ def getBatchDetails(self, ip_addresses, batch_size=None):
117155
else:
118156
lookup_addresses.append(ip_address)
119157

158+
# prepare req http options
159+
req_opts = {**self.request_options, "timeout": timeout_per_batch}
160+
161+
if timeout_total is not None:
162+
start_time = time.time()
163+
120164
# loop over batch chunks and do lookup for each.
121165
for i in range(0, len(ip_addresses), batch_size):
166+
# quit if total timeout is reached.
167+
if (
168+
timeout_total is not None
169+
and time.time() - start_time > timeout_total
170+
):
171+
if raise_on_fail:
172+
raise TimeoutExceededError()
173+
else:
174+
return result
175+
122176
chunk = ip_addresses[i : i + batch_size]
123177

124178
# lookup
125-
url = handler_utils.API_URL + "/batch"
179+
url = API_URL + "/batch"
126180
headers = handler_utils.get_headers(self.access_token)
127181
headers["content-type"] = "application/json"
128182
response = requests.post(
129-
url,
130-
json=lookup_addresses,
131-
headers=headers,
132-
**self.request_options
183+
url, json=lookup_addresses, headers=headers, **req_opts
133184
)
134-
if response.status_code == 429:
135-
raise RequestQuotaExceededError()
136-
response.raise_for_status()
185+
186+
# fail on bad status codes
187+
try:
188+
if response.status_code == 429:
189+
raise RequestQuotaExceededError()
190+
response.raise_for_status()
191+
except Exception as e:
192+
if raise_on_fail:
193+
raise e
194+
else:
195+
return result
137196

138197
# fill cache
139198
json_response = response.json()

ipinfo/handler_async.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@
1313
from .cache.default import DefaultCache
1414
from .details import Details
1515
from .exceptions import RequestQuotaExceededError
16+
from .handler_utils import (
17+
API_URL,
18+
COUNTRY_FILE_DEFAULT,
19+
BATCH_MAX_SIZE,
20+
CACHE_MAXSIZE,
21+
CACHE_TTL,
22+
REQUEST_TIMEOUT_DEFAULT,
23+
BATCH_REQ_TIMEOUT_DEFAULT,
24+
)
1625
from . import handler_utils
1726

1827

@@ -22,10 +31,6 @@ class AsyncHandler:
2231
Instantiates and maintains access to cache.
2332
"""
2433

25-
CACHE_MAXSIZE = 4096
26-
CACHE_TTL = 60 * 60 * 24
27-
REQUEST_TIMEOUT_DEFAULT = 2
28-
2934
def __init__(self, access_token=None, **kwargs):
3035
"""
3136
Initialize the Handler object with country name list and the
@@ -41,7 +46,7 @@ def __init__(self, access_token=None, **kwargs):
4146
# setup req opts
4247
self.request_options = kwargs.get("request_options", {})
4348
if "timeout" not in self.request_options:
44-
self.request_options["timeout"] = self.REQUEST_TIMEOUT_DEFAULT
49+
self.request_options["timeout"] = REQUEST_TIMEOUT_DEFAULT
4550

4651
# setup aiohttp
4752
self.httpsess = None
@@ -52,9 +57,9 @@ def __init__(self, access_token=None, **kwargs):
5257
else:
5358
cache_options = kwargs.get("cache_options", {})
5459
if "maxsize" not in cache_options:
55-
cache_options["maxsize"] = self.CACHE_MAXSIZE
60+
cache_options["maxsize"] = CACHE_MAXSIZE
5661
if "ttl" not in cache_options:
57-
cache_options["ttl"] = self.CACHE_TTL
62+
cache_options["ttl"] = CACHE_TTL
5863
self.cache = DefaultCache(**cache_options)
5964

6065
async def init(self):
@@ -97,7 +102,7 @@ async def getDetails(self, ip_address=None):
97102
return Details(self.cache[ip_address])
98103

99104
# not in cache; do http req
100-
url = handler_utils.API_URL
105+
url = API_URL
101106
if ip_address:
102107
url += "/" + ip_address
103108
headers = handler_utils.get_headers(self.access_token)
@@ -122,16 +127,17 @@ async def getBatchDetails(self, ip_addresses, batch_size=None):
122127
all of the response data, which is at least a magnitude larger than the
123128
input list).
124129
130+
The input list is broken up into batches to abide by API requirements.
125131
The batch size can be adjusted with `batch_size` but is clipped to (and
126-
also defaults to) `handler_utils.BATCH_MAX_SIZE`.
132+
also defaults to) `BATCH_MAX_SIZE`.
127133
128134
The concurrency level is currently unadjustable; coroutines will be
129135
created and consumed for all batches at once.
130136
"""
131137
self._ensure_aiohttp_ready()
132138

133139
if batch_size == None:
134-
batch_size = handler_utils.BATCH_MAX_SIZE
140+
batch_size = BATCH_MAX_SIZE
135141

136142
result = {}
137143

@@ -162,7 +168,7 @@ async def getBatchDetails(self, ip_addresses, batch_size=None):
162168
return result
163169

164170
# do http req
165-
url = handler_utils.API_URL + "/batch"
171+
url = API_URL + "/batch"
166172
headers = handler_utils.get_headers(self.access_token)
167173
headers["content-type"] = "application/json"
168174
reqs.append(

ipinfo/handler_utils.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@
1818
# The max amount of IPs allowed by the API per batch request.
1919
BATCH_MAX_SIZE = 1000
2020

21+
# The default max size of the cache in terms of number of items.
22+
CACHE_MAXSIZE = 4096
23+
24+
# The default TTL of the cache in seconds
25+
CACHE_TTL = 60 * 60 * 24
26+
27+
# The default request timeout for per-IP requests.
28+
REQUEST_TIMEOUT_DEFAULT = 2
29+
30+
# The default request timeout for batch requests.
31+
BATCH_REQ_TIMEOUT_DEFAULT = 5
32+
2133

2234
def get_headers(access_token):
2335
"""Build headers for request to IPinfo API."""

0 commit comments

Comments
 (0)