1
- import zlib
2
1
import os
3
2
import time
4
- from pandas .compat import StringIO
5
3
4
+ from pandas .compat import StringIO
6
5
import pandas .compat as compat
7
6
import pandas as pd
8
- import requests
9
7
10
8
from pandas_datareader .base import _BaseReader
11
9
12
10
13
11
class EnigmaReader (_BaseReader ):
14
12
"""
15
- Collects Enigma data located at the specified datapath and
16
- returns a pandas DataFrame.
13
+ Collects current snapshot of Enigma data located at the specified
14
+ dataset ID and returns a pandas DataFrame.
15
+
16
+ # Example
17
+ Download current snapshot for the following Florida Inspections Dataset
18
+ https://public.enigma.com/datasets/bedaf052-5fcd-4758-8d27-048ce8746c6a
17
19
18
20
Usage (high-level):
19
21
```
20
22
import pandas_datareader as pdr
21
- df = pdr.get_data_enigma('enigma.inspections.restaurants.fl ')
23
+ df = pdr.get_data_enigma('bedaf052-5fcd-4758-8d27-048ce8746c6a ')
22
24
23
25
# in the event that ENIGMA_API_KEY does not exist in your env,
24
26
# it can be supplied as the second arg:
25
- df = prd.get_data_enigma('enigma.inspections.restaurants.fl ',
26
- ... 'ARIAMFHKJMISF38UT ')
27
+ df = prd.get_data_enigma('bedaf052-5fcd-4758-8d27-048ce8746c6a ',
28
+ ... 'INSERT_API_KEY ')
27
29
```
28
30
29
31
Usage:
30
32
```
31
- df = EnigmaReader(datapath='enigma.inspections.restaurants.fl ',
32
- ... api_key='ARIAMFHKJMISF38UT ').read()
33
+ df = EnigmaReader(dataset_id='bedaf052-5fcd-4758-8d27-048ce8746c6a ',
34
+ ... api_key='INSERT_API_KEY ').read()
33
35
```
34
36
"""
35
-
36
37
def __init__ (self ,
37
- datapath = None ,
38
+ dataset_id = None ,
38
39
api_key = None ,
39
40
retry_count = 5 ,
40
- pause = 0.250 ,
41
+ pause = .75 ,
41
42
session = None ):
42
43
43
44
super (EnigmaReader , self ).__init__ (symbols = [],
@@ -49,64 +50,69 @@ def __init__(self,
49
50
raise ValueError ("Please provide an Enigma API key or set "
50
51
"the ENIGMA_API_KEY environment variable\n "
51
52
"If you do not have an API key, you can get "
52
- "one here: https ://app .enigma.io /signup" )
53
+ "one here: http ://public .enigma.com /signup" )
53
54
else :
54
55
self ._api_key = api_key
55
56
56
- self ._datapath = datapath
57
- if not isinstance (self ._datapath , compat .string_types ):
57
+ self ._dataset_id = dataset_id
58
+ if not isinstance (self ._dataset_id , compat .string_types ):
58
59
raise ValueError (
59
- "The Enigma datapath must be a string (ex: "
60
- "'enigma.inspections.restaurants.fl ')" )
61
-
62
- @ property
63
- def url (self ):
64
- return 'https://api.enigma.io/v2/export/{}/{}' . format ( self . _api_key ,
65
- self . _datapath )
66
-
67
- @ property
68
- def export_key ( self ):
69
- return 'export_url'
60
+ "The Enigma dataset_id must be a string (ex: "
61
+ "'bedaf052-5fcd-4758-8d27-048ce8746c6a ')" )
62
+
63
+ headers = {
64
+ 'Authorization' : 'Bearer {0}' . format (self . _api_key ),
65
+ 'User-Agent' : 'pandas-datareader' ,
66
+ }
67
+ self . session . headers . update ( headers )
68
+ self . _base_url = "https://public.enigma.com/api"
69
+ self . _retry_count = retry_count
70
+ self . _retry_delay = pause
70
71
71
- @property
72
- def _head_key (self ):
73
- return 'head_url'
74
-
75
- def _request (self , url ):
76
- self .session .headers .update ({'User-Agent' : 'pandas-datareader' })
77
- resp = self .session .get (url )
78
- resp .raise_for_status ()
79
- return resp
80
-
81
- def _decompress_export (self , compressed_export_data ):
82
- return zlib .decompress (compressed_export_data , 16 + zlib .MAX_WBITS )
72
+ def read (self ):
73
+ try :
74
+ return self ._read ()
75
+ finally :
76
+ self .close ()
83
77
84
- def extract_export_url (self , delay = 10 , max_attempts = 10 ):
85
- """
86
- Performs an HTTP HEAD request on 'head_url' until it returns a `200`.
87
- This allows the Enigma API time to export the requested data.
88
- """
89
- resp = self ._request (self .url )
78
+ def _read (self ):
79
+ snapshot_id = self .get_current_snapshot_id (self ._dataset_id )
80
+ exported_data = self .get_snapshot_export (snapshot_id ) # TODO: Retry?
81
+ decoded_data = exported_data .decode ("utf-8" )
82
+ return pd .read_csv (StringIO (decoded_data ))
83
+
84
+ def _get (self , url ):
85
+ """HTTP GET Request with Retry Logic"""
86
+ url = "{0}/{1}" .format (self ._base_url , url )
90
87
attempts = 0
91
88
while True :
92
89
try :
93
- requests .head (resp .json ()[self ._head_key ]).raise_for_status ()
90
+ response = self .session .get (url )
91
+ response .raise_for_status ()
92
+ return response
94
93
except Exception as e :
95
- attempts += 1
96
- if attempts > max_attempts :
94
+ if attempts < self ._retry_count :
95
+ attempts += 1
96
+ time .sleep (self ._retry_delay )
97
+ continue
98
+ else :
97
99
raise e
98
- time .sleep (delay )
99
- continue
100
- return resp .json ()[self .export_key ]
101
100
102
- def read (self ):
103
- try :
104
- return self ._read ()
105
- finally :
106
- self .close ()
101
+ def get_current_snapshot_id (self , dataset_id ):
102
+ """Get ID of the most current snapshot of a dataset"""
103
+ dataset_metadata = self .get_dataset_metadata (dataset_id )
104
+ return dataset_metadata ['current_snapshot' ]['id' ]
107
105
108
- def _read (self ):
109
- export_gzipped_req = self ._request (self .extract_export_url ())
110
- decompressed_data = self ._decompress_export (
111
- export_gzipped_req .content ).decode ("utf-8" )
112
- return pd .read_csv (StringIO (decompressed_data ))
106
+ def get_dataset_metadata (self , dataset_id ):
107
+ """Get the Dataset Model of this EnigmaReader's dataset
108
+ https://docs.public.enigma.com/resources/dataset/index.html
109
+ """
110
+ url = "datasets/{0}?row_limit=0" .format (dataset_id )
111
+ response = self ._get (url )
112
+ return response .json ()
113
+
114
+ def get_snapshot_export (self , snapshot_id ):
115
+ """Return raw CSV of a dataset"""
116
+ url = "export/{0}" .format (snapshot_id )
117
+ response = self ._get (url )
118
+ return response .content
0 commit comments