Skip to content

Commit 87dffc8

Browse files
Merge pull request #380 from hydrosquall/feature/Issue-376-update-enigma-connector
Feature/Issue 376 Update EnigmaReader Data Connector
2 parents edc8a8d + 95af001 commit 87dffc8

File tree

4 files changed

+92
-73
lines changed

4 files changed

+92
-73
lines changed

docs/source/remote_data.rst

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,15 +204,20 @@ Available expiry dates can be accessed from the ``expiry_dates`` property.
204204
Enigma
205205
======
206206

207-
Access datasets from `Enigma <https://app.enigma.io>`__,
208-
the world's largest repository of structured public data.
207+
Access datasets from `Enigma <https://public.enigma.com>`__,
208+
the world's largest repository of structured public data. Note that the Enigma
209+
URL has changed from `app.enigma.io <https://app.enigma.io>`__ as of release
210+
``0.6.0``, as the old API deprecated.
211+
212+
Datasets are unique identified by the ``uuid4`` at the end of a dataset's web address.
213+
For example, the following code downloads from `USDA Food Recalls 1996 Data <https://public.enigma.com/datasets/292129b0-1275-44c8-a6a3-2a0881f24fe1>`__.
209214

210215
.. ipython:: python
211216
212217
import os
213218
import pandas_datareader as pdr
214219
215-
df = pdr.get_data_enigma('enigma.trade.ams.toxic.2015', os.getenv('ENIGMA_API_KEY'))
220+
df = pdr.get_data_enigma('292129b0-1275-44c8-a6a3-2a0881f24fe1', os.getenv('ENIGMA_API_KEY'))
216221
df.columns
217222
218223
.. _remote_data.quandl:

pandas_datareader/data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def DataReader(name, data_source=None, start=None, end=None,
137137
session=session).read()
138138

139139
elif data_source == "enigma":
140-
return EnigmaReader(datapath=name, api_key=access_key).read()
140+
return EnigmaReader(dataset_id=name, api_key=access_key).read()
141141

142142
elif data_source == "fred":
143143
return FredReader(symbols=name, start=start, end=end,

pandas_datareader/enigma.py

Lines changed: 67 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,44 @@
1-
import zlib
21
import os
32
import time
4-
from pandas.compat import StringIO
53

4+
from pandas.compat import StringIO
65
import pandas.compat as compat
76
import pandas as pd
8-
import requests
97

108
from pandas_datareader.base import _BaseReader
119

1210

1311
class EnigmaReader(_BaseReader):
1412
"""
15-
Collects Enigma data located at the specified datapath and
16-
returns a pandas DataFrame.
13+
Collects current snapshot of Enigma data located at the specified
14+
dataset ID and returns a pandas DataFrame.
15+
16+
# Example
17+
Download current snapshot for the following Florida Inspections Dataset
18+
https://public.enigma.com/datasets/bedaf052-5fcd-4758-8d27-048ce8746c6a
1719
1820
Usage (high-level):
1921
```
2022
import pandas_datareader as pdr
21-
df = pdr.get_data_enigma('enigma.inspections.restaurants.fl')
23+
df = pdr.get_data_enigma('bedaf052-5fcd-4758-8d27-048ce8746c6a')
2224
2325
# in the event that ENIGMA_API_KEY does not exist in your env,
2426
# it can be supplied as the second arg:
25-
df = prd.get_data_enigma('enigma.inspections.restaurants.fl',
26-
... 'ARIAMFHKJMISF38UT')
27+
df = prd.get_data_enigma('bedaf052-5fcd-4758-8d27-048ce8746c6a',
28+
... 'INSERT_API_KEY')
2729
```
2830
2931
Usage:
3032
```
31-
df = EnigmaReader(datapath='enigma.inspections.restaurants.fl',
32-
... api_key='ARIAMFHKJMISF38UT').read()
33+
df = EnigmaReader(dataset_id='bedaf052-5fcd-4758-8d27-048ce8746c6a',
34+
... api_key='INSERT_API_KEY').read()
3335
```
3436
"""
35-
3637
def __init__(self,
37-
datapath=None,
38+
dataset_id=None,
3839
api_key=None,
3940
retry_count=5,
40-
pause=0.250,
41+
pause=.75,
4142
session=None):
4243

4344
super(EnigmaReader, self).__init__(symbols=[],
@@ -49,64 +50,69 @@ def __init__(self,
4950
raise ValueError("Please provide an Enigma API key or set "
5051
"the ENIGMA_API_KEY environment variable\n"
5152
"If you do not have an API key, you can get "
52-
"one here: https://app.enigma.io/signup")
53+
"one here: http://public.enigma.com/signup")
5354
else:
5455
self._api_key = api_key
5556

56-
self._datapath = datapath
57-
if not isinstance(self._datapath, compat.string_types):
57+
self._dataset_id = dataset_id
58+
if not isinstance(self._dataset_id, compat.string_types):
5859
raise ValueError(
59-
"The Enigma datapath must be a string (ex: "
60-
"'enigma.inspections.restaurants.fl')")
61-
62-
@property
63-
def url(self):
64-
return 'https://api.enigma.io/v2/export/{}/{}'.format(self._api_key,
65-
self._datapath)
66-
67-
@property
68-
def export_key(self):
69-
return 'export_url'
60+
"The Enigma dataset_id must be a string (ex: "
61+
"'bedaf052-5fcd-4758-8d27-048ce8746c6a')")
62+
63+
headers = {
64+
'Authorization': 'Bearer {0}'.format(self._api_key),
65+
'User-Agent': 'pandas-datareader',
66+
}
67+
self.session.headers.update(headers)
68+
self._base_url = "https://public.enigma.com/api"
69+
self._retry_count = retry_count
70+
self._retry_delay = pause
7071

71-
@property
72-
def _head_key(self):
73-
return 'head_url'
74-
75-
def _request(self, url):
76-
self.session.headers.update({'User-Agent': 'pandas-datareader'})
77-
resp = self.session.get(url)
78-
resp.raise_for_status()
79-
return resp
80-
81-
def _decompress_export(self, compressed_export_data):
82-
return zlib.decompress(compressed_export_data, 16 + zlib.MAX_WBITS)
72+
def read(self):
73+
try:
74+
return self._read()
75+
finally:
76+
self.close()
8377

84-
def extract_export_url(self, delay=10, max_attempts=10):
85-
"""
86-
Performs an HTTP HEAD request on 'head_url' until it returns a `200`.
87-
This allows the Enigma API time to export the requested data.
88-
"""
89-
resp = self._request(self.url)
78+
def _read(self):
79+
snapshot_id = self.get_current_snapshot_id(self._dataset_id)
80+
exported_data = self.get_snapshot_export(snapshot_id) # TODO: Retry?
81+
decoded_data = exported_data.decode("utf-8")
82+
return pd.read_csv(StringIO(decoded_data))
83+
84+
def _get(self, url):
85+
"""HTTP GET Request with Retry Logic"""
86+
url = "{0}/{1}".format(self._base_url, url)
9087
attempts = 0
9188
while True:
9289
try:
93-
requests.head(resp.json()[self._head_key]).raise_for_status()
90+
response = self.session.get(url)
91+
response.raise_for_status()
92+
return response
9493
except Exception as e:
95-
attempts += 1
96-
if attempts > max_attempts:
94+
if attempts < self._retry_count:
95+
attempts += 1
96+
time.sleep(self._retry_delay)
97+
continue
98+
else:
9799
raise e
98-
time.sleep(delay)
99-
continue
100-
return resp.json()[self.export_key]
101100

102-
def read(self):
103-
try:
104-
return self._read()
105-
finally:
106-
self.close()
101+
def get_current_snapshot_id(self, dataset_id):
102+
"""Get ID of the most current snapshot of a dataset"""
103+
dataset_metadata = self.get_dataset_metadata(dataset_id)
104+
return dataset_metadata['current_snapshot']['id']
107105

108-
def _read(self):
109-
export_gzipped_req = self._request(self.extract_export_url())
110-
decompressed_data = self._decompress_export(
111-
export_gzipped_req.content).decode("utf-8")
112-
return pd.read_csv(StringIO(decompressed_data))
106+
def get_dataset_metadata(self, dataset_id):
107+
"""Get the Dataset Model of this EnigmaReader's dataset
108+
https://docs.public.enigma.com/resources/dataset/index.html
109+
"""
110+
url = "datasets/{0}?row_limit=0".format(dataset_id)
111+
response = self._get(url)
112+
return response.json()
113+
114+
def get_snapshot_export(self, snapshot_id):
115+
"""Return raw CSV of a dataset"""
116+
url = "export/{0}".format(snapshot_id)
117+
response = self._get(url)
118+
return response.content

pandas_datareader/tests/test_enigma.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,32 +11,40 @@
1111
@pytest.mark.skipif(TEST_API_KEY is None, reason="no enigma_api_key")
1212
class TestEnigma(object):
1313

14+
@property
15+
def dataset_id(self):
16+
"""
17+
USDA Food Recall Archive - 1996
18+
Selected for being a relatively small dataset.
19+
https://public.enigma.com/datasets/292129b0-1275-44c8-a6a3-2a0881f24fe1
20+
"""
21+
return "292129b0-1275-44c8-a6a3-2a0881f24fe1"
22+
1423
@classmethod
1524
def setup_class(cls):
1625
pytest.importorskip("lxml")
1726

1827
def test_enigma_datareader(self):
1928
try:
20-
df = web.DataReader('enigma.inspections.restaurants.fl',
29+
df = web.DataReader(self.dataset_id,
2130
'enigma', access_key=TEST_API_KEY)
22-
assert 'serialid' in df.columns
31+
assert 'case_number' in df.columns
2332
except HTTPError as e:
2433
pytest.skip(e)
2534

2635
def test_enigma_get_data_enigma(self):
2736
try:
28-
df = pdr.get_data_enigma(
29-
'enigma.inspections.restaurants.fl', TEST_API_KEY)
30-
assert 'serialid' in df.columns
37+
df = pdr.get_data_enigma(self.dataset_id, TEST_API_KEY)
38+
assert 'case_number' in df.columns
3139
except HTTPError as e:
3240
pytest.skip(e)
3341

3442
def test_bad_key(self):
3543
with pytest.raises(HTTPError):
36-
web.DataReader('enigma.inspections.restaurants.fl',
44+
web.DataReader(self.dataset_id,
3745
'enigma', access_key=TEST_API_KEY + 'xxx')
3846

39-
def test_bad_url(self):
47+
def test_bad_dataset_id(self):
4048
with pytest.raises(HTTPError):
41-
web.DataReader('enigma.inspections.restaurants.fllzzy',
49+
web.DataReader('zzzzzzzz-zzzz-zzzz-zzzz-zzzzzzzzzzz',
4250
'enigma', access_key=TEST_API_KEY)

0 commit comments

Comments
 (0)