Skip to content

Commit e229f52

Browse files
authored
Merge pull request #37 from ipinfo/uman/better-batch-ops
Improved batch processing
2 parents 1b98867 + 6f36516 commit e229f52

File tree

12 files changed

+517
-272
lines changed

12 files changed

+517
-272
lines changed

CHANGELOG.md

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,34 @@
11
# IPInfo Changelog
22

3+
## 4.1.0
4+
5+
- The SDK version is available via `ipinfo.version` as `SDK_VERSION`.
6+
- Most private functions on all handlers (i.e. those that start with `_`) are
7+
now moved to `ipinfo.handler_utils`.
8+
- All constants that existed on handlers (i.e. `REQUEST_TIMEOUT_DEFAULT`) are
9+
now moved to `ipinfo.handler_utils`.
10+
- Cache behavior for the synchronous handler is a bit different now; the item
11+
actually cached is the item _after_ formatting is complete, rather than
12+
before.
13+
- Both the sync and async handlers have the following improvements:
14+
- `timeout` can be specified as a keyword-arg to getDetails to optionally
15+
override the client-level timeout.
16+
- getBatchDetails now has no limit to the size of the `ip_addresses` input
17+
list. It will chunk the list internally and make requests against the
18+
batch endpoint in a way that doesn't exceed the API's own limits.
19+
- getBatchDetails now accepts the new options `batch_size`,
20+
`timeout_per_batch`, `timeout_total` and `raise_on_fail`. Please see the
21+
documentation for details on what each of these do.
22+
323
## 4.0.0
424

525
#### Breaking Changes
626

727
- [PR #32](https://github.com/ipinfo/python/pull/32)
8-
All EOL Python versions are no longer supported; currently, Python 3.6 or greater is now **required**.
9-
An asynchronous handler is available from `getHandlerAsync` which returns an `AsyncHandler` which uses **aiohttp**.
28+
All EOL Python versions are no longer supported; currently, Python 3.6 or
29+
greater is now **required**.
30+
An asynchronous handler is available from `getHandlerAsync` which returns an
31+
`AsyncHandler` which uses **aiohttp**.
1032

1133
## 3.0.0
1234

README.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,10 @@ The file must be a `.json` file with the following structure:
218218

219219
### Batch Operations
220220

221-
Looking up a single IP at a time can be slow. It could be done concurrently from
222-
the client side, but IPinfo supports a batch endpoint to allow you to group
223-
together IPs and let us handle retrieving details for them in bulk for you.
221+
Looking up a single IP at a time can be slow. It could be done concurrently
222+
from the client side, but IPinfo supports a batch endpoint to allow you to
223+
group together IPs and let us handle retrieving details for them in bulk for
224+
you.
224225

225226
```python
226227
>>> import ipinfo, pprint
@@ -256,6 +257,9 @@ together IPs and let us handle retrieving details for them in bulk for you.
256257
'timezone': 'America/Los_Angeles'}}
257258
```
258259

260+
The input size is not limited, as the interface will chunk operations for you
261+
behind the scenes.
262+
259263
Please see [the official documentation](https://ipinfo.io/developers/batch) for
260264
more information and limitations.
261265

ipinfo/exceptions.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ class RequestQuotaExceededError(Exception):
77
"""Error indicating that users monthly request quota has been passed."""
88

99
pass
10+
11+
12+
class TimeoutExceededError(Exception):
13+
"""Error indicating that some timeout has been exceeded."""
14+
15+
pass

ipinfo/handler.py

Lines changed: 143 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,23 @@
66
import json
77
import os
88
import sys
9+
import time
910

1011
import requests
1112

1213
from .cache.default import DefaultCache
1314
from .details import Details
14-
from .exceptions import RequestQuotaExceededError
15+
from .exceptions import RequestQuotaExceededError, TimeoutExceededError
16+
from .handler_utils import (
17+
API_URL,
18+
COUNTRY_FILE_DEFAULT,
19+
BATCH_MAX_SIZE,
20+
CACHE_MAXSIZE,
21+
CACHE_TTL,
22+
REQUEST_TIMEOUT_DEFAULT,
23+
BATCH_REQ_TIMEOUT_DEFAULT,
24+
)
25+
from . import handler_utils
1526

1627

1728
class Handler:
@@ -20,12 +31,6 @@ class Handler:
2031
Instantiates and maintains access to cache.
2132
"""
2233

23-
API_URL = "https://ipinfo.io"
24-
CACHE_MAXSIZE = 4096
25-
CACHE_TTL = 60 * 60 * 24
26-
COUNTRY_FILE_DEFAULT = "countries.json"
27-
REQUEST_TIMEOUT_DEFAULT = 2
28-
2934
def __init__(self, access_token=None, **kwargs):
3035
"""
3136
Initialize the Handler object with country name list and the
@@ -34,39 +39,113 @@ def __init__(self, access_token=None, **kwargs):
3439
self.access_token = access_token
3540

3641
# load countries file
37-
self.countries = self._read_country_names(kwargs.get("countries_file"))
42+
self.countries = handler_utils.read_country_names(
43+
kwargs.get("countries_file")
44+
)
3845

3946
# setup req opts
4047
self.request_options = kwargs.get("request_options", {})
4148
if "timeout" not in self.request_options:
42-
self.request_options["timeout"] = self.REQUEST_TIMEOUT_DEFAULT
49+
self.request_options["timeout"] = REQUEST_TIMEOUT_DEFAULT
4350

4451
# setup cache
4552
if "cache" in kwargs:
4653
self.cache = kwargs["cache"]
4754
else:
4855
cache_options = kwargs.get("cache_options", {})
4956
if "maxsize" not in cache_options:
50-
cache_options["maxsize"] = self.CACHE_MAXSIZE
57+
cache_options["maxsize"] = CACHE_MAXSIZE
5158
if "ttl" not in cache_options:
52-
cache_options["ttl"] = self.CACHE_TTL
59+
cache_options["ttl"] = CACHE_TTL
5360
self.cache = DefaultCache(**cache_options)
5461

55-
def getDetails(self, ip_address=None):
56-
"""Get details for specified IP address as a Details object."""
57-
raw_details = self._requestDetails(ip_address)
58-
self._format_details(raw_details)
59-
return Details(raw_details)
62+
def getDetails(self, ip_address=None, timeout=None):
63+
"""
64+
Get details for specified IP address as a Details object.
65+
66+
If `timeout` is not `None`, it will override the client-level timeout
67+
just for this operation.
68+
"""
69+
# If the supplied IP address uses the objects defined in the built-in
70+
# module ipaddress extract the appropriate string notation before
71+
# formatting the URL.
72+
if isinstance(ip_address, IPv4Address) or isinstance(
73+
ip_address, IPv6Address
74+
):
75+
ip_address = ip_address.exploded
76+
77+
if ip_address in self.cache:
78+
return Details(self.cache[ip_address])
79+
80+
# prepare req http opts
81+
req_opts = {**self.request_options}
82+
if timeout is not None:
83+
req_opts["timeout"] = timeout
84+
85+
# not in cache; do http req
86+
url = API_URL
87+
if ip_address:
88+
url += "/" + ip_address
89+
headers = handler_utils.get_headers(self.access_token)
90+
response = requests.get(url, headers=headers, **req_opts)
91+
if response.status_code == 429:
92+
raise RequestQuotaExceededError()
93+
response.raise_for_status()
94+
details = response.json()
95+
96+
# format & cache
97+
handler_utils.format_details(details, self.countries)
98+
self.cache[ip_address] = details
99+
100+
return Details(details)
101+
102+
def getBatchDetails(
103+
self,
104+
ip_addresses,
105+
batch_size=None,
106+
timeout_per_batch=BATCH_REQ_TIMEOUT_DEFAULT,
107+
timeout_total=None,
108+
raise_on_fail=True,
109+
):
110+
"""
111+
Get details for a batch of IP addresses at once.
112+
113+
There is no specified limit to the number of IPs this function can
114+
accept; it can handle as much as the user can fit in RAM (along with
115+
all of the response data, which is at least a magnitude larger than the
116+
input list).
117+
118+
The input list is broken up into batches to abide by API requirements.
119+
The batch size can be adjusted with `batch_size` but is clipped to
120+
`BATCH_MAX_SIZE`.
121+
Defaults to `BATCH_MAX_SIZE`.
122+
123+
For each batch, `timeout_per_batch` indicates the maximum seconds to
124+
spend waiting for the HTTP request to complete. If any batch fails with
125+
this timeout, the whole operation fails.
126+
Defaults to `BATCH_REQ_TIMEOUT_DEFAULT` seconds.
127+
128+
`timeout_total` is a seconds-denominated hard-timeout for the time
129+
spent in HTTP operations; regardless of whether all batches have
130+
succeeded so far, if `timeout_total` is reached, the whole operation
131+
will fail by raising `TimeoutExceededError`.
132+
Defaults to being turned off.
133+
134+
`raise_on_fail`, if turned off, will return any result retrieved so far
135+
rather than raise an exception when errors occur, including timeout and
136+
quota errors.
137+
Defaults to on.
138+
"""
139+
if batch_size == None:
140+
batch_size = BATCH_MAX_SIZE
60141

61-
def getBatchDetails(self, ip_addresses):
62-
"""Get details for a batch of IP addresses at once."""
63142
result = {}
64143

65-
# Pre-populate with anything we've got in the cache, and keep around
144+
# pre-populate with anything we've got in the cache, and keep around
66145
# the IPs not in the cache.
67146
lookup_addresses = []
68147
for ip_address in ip_addresses:
69-
# If the supplied IP address uses the objects defined in the
148+
# if the supplied IP address uses the objects defined in the
70149
# built-in module ipaddress extract the appropriate string notation
71150
# before formatting the URL.
72151
if isinstance(ip_address, IPv4Address) or isinstance(
@@ -79,95 +158,57 @@ def getBatchDetails(self, ip_addresses):
79158
else:
80159
lookup_addresses.append(ip_address)
81160

82-
# Do the lookup
83-
url = self.API_URL + "/batch"
84-
headers = self._get_headers()
85-
headers["content-type"] = "application/json"
86-
response = requests.post(
87-
url, json=lookup_addresses, headers=headers, **self.request_options
88-
)
89-
if response.status_code == 429:
90-
raise RequestQuotaExceededError()
91-
response.raise_for_status()
161+
# all in cache - return early.
162+
if len(lookup_addresses) == 0:
163+
return result
92164

93-
# Fill up cache
94-
json_response = response.json()
95-
for ip_address, details in json_response.items():
96-
self.cache[ip_address] = details
165+
# do start timer if necessary
166+
if timeout_total is not None:
167+
start_time = time.time()
97168

98-
# Merge cached results with new lookup
99-
result.update(json_response)
169+
# prepare req http options
170+
req_opts = {**self.request_options, "timeout": timeout_per_batch}
100171

101-
# Format every result
102-
for detail in result.values():
103-
if isinstance(detail, dict):
104-
self._format_details(detail)
105-
106-
return result
172+
# loop over batch chunks and do lookup for each.
173+
url = API_URL + "/batch"
174+
headers = handler_utils.get_headers(self.access_token)
175+
headers["content-type"] = "application/json"
176+
for i in range(0, len(lookup_addresses), batch_size):
177+
# quit if total timeout is reached.
178+
if (
179+
timeout_total is not None
180+
and time.time() - start_time > timeout_total
181+
):
182+
return handler_utils.return_or_fail(
183+
raise_on_fail, TimeoutExceededError(), result
184+
)
107185

108-
def _requestDetails(self, ip_address=None):
109-
"""Get IP address data by sending request to IPinfo API."""
186+
chunk = lookup_addresses[i : i + batch_size]
110187

111-
# If the supplied IP address uses the objects defined in the built-in
112-
# module ipaddress extract the appropriate string notation before
113-
# formatting the URL.
114-
if isinstance(ip_address, IPv4Address) or isinstance(
115-
ip_address, IPv6Address
116-
):
117-
ip_address = ip_address.exploded
188+
# lookup
189+
response = requests.post(
190+
url, json=chunk, headers=headers, **req_opts
191+
)
118192

119-
if ip_address not in self.cache:
120-
url = self.API_URL
121-
if ip_address:
122-
url += "/" + ip_address
193+
# fail on bad status codes
194+
try:
195+
if response.status_code == 429:
196+
raise RequestQuotaExceededError()
197+
response.raise_for_status()
198+
except Exception as e:
199+
return handler_utils.return_or_fail(raise_on_fail, e, result)
123200

124-
response = requests.get(
125-
url, headers=self._get_headers(), **self.request_options
126-
)
127-
if response.status_code == 429:
128-
raise RequestQuotaExceededError()
129-
response.raise_for_status()
130-
self.cache[ip_address] = response.json()
131-
132-
return self.cache[ip_address]
133-
134-
def _get_headers(self):
135-
"""Built headers for request to IPinfo API."""
136-
headers = {
137-
"user-agent": "IPinfoClient/Python{version}/4.0.0".format(
138-
version=sys.version_info[0]
139-
),
140-
"accept": "application/json",
141-
}
142-
143-
if self.access_token:
144-
headers["authorization"] = "Bearer {}".format(self.access_token)
145-
146-
return headers
147-
148-
def _format_details(self, details):
149-
details["country_name"] = self.countries.get(details.get("country"))
150-
details["latitude"], details["longitude"] = self._read_coords(
151-
details.get("loc")
152-
)
201+
# fill cache
202+
json_response = response.json()
203+
for ip_address, details in json_response.items():
204+
self.cache[ip_address] = details
153205

154-
def _read_coords(self, location):
155-
lat, lon = None, None
156-
coords = tuple(location.split(",")) if location else ""
157-
if len(coords) == 2 and coords[0] and coords[1]:
158-
lat, lon = coords[0], coords[1]
159-
return lat, lon
206+
# merge cached results with new lookup
207+
result.update(json_response)
160208

161-
def _read_country_names(self, countries_file=None):
162-
"""
163-
Read list of countries from specified country file or
164-
default file.
165-
"""
166-
if not countries_file:
167-
countries_file = os.path.join(
168-
os.path.dirname(__file__), self.COUNTRY_FILE_DEFAULT
169-
)
170-
with open(countries_file) as f:
171-
countries_json = f.read()
209+
# format all
210+
for detail in result.values():
211+
if isinstance(detail, dict):
212+
handler_utils.format_details(detail, self.countries)
172213

173-
return json.loads(countries_json)
214+
return result

0 commit comments

Comments
 (0)