1
- import zlib
2
1
import os
3
- import time
4
2
from pandas .compat import StringIO
5
3
6
4
import pandas .compat as compat
7
5
import pandas as pd
8
- import requests
9
6
10
7
from pandas_datareader .base import _BaseReader
11
8
12
9
13
10
class EnigmaReader (_BaseReader ):
14
11
"""
15
- Collects Enigma data located at the specified datapath and
16
- returns a pandas DataFrame.
12
+ Collects current snapshot of Enigma data located at the specified
13
+ dataset ID and returns a pandas DataFrame.
14
+
15
+ # Example
16
+ Download current snapshot for the following Florida Inspections Dataset
17
+ https://public.enigma.com/datasets/bedaf052-5fcd-4758-8d27-048ce8746c6a
17
18
18
19
Usage (high-level):
19
20
```
20
21
import pandas_datareader as pdr
21
- df = pdr.get_data_enigma('enigma.inspections.restaurants.fl ')
22
+ df = pdr.get_data_enigma('bedaf052-5fcd-4758-8d27-048ce8746c6a ')
22
23
23
24
# in the event that ENIGMA_API_KEY does not exist in your env,
24
25
# it can be supplied as the second arg:
25
- df = prd.get_data_enigma('enigma.inspections.restaurants.fl ',
26
- ... 'ARIAMFHKJMISF38UT ')
26
+ df = prd.get_data_enigma('bedaf052-5fcd-4758-8d27-048ce8746c6a ',
27
+ ... 'INSERT_API_KEY ')
27
28
```
28
29
29
30
Usage:
30
31
```
31
- df = EnigmaReader(datapath='enigma.inspections.restaurants.fl ',
32
- ... api_key='ARIAMFHKJMISF38UT ').read()
32
+ df = EnigmaReader(dataset_id='bedaf052-5fcd-4758-8d27-048ce8746c6a ',
33
+ ... api_key='INSERT_API_KEY ').read()
33
34
```
34
35
"""
35
-
36
36
def __init__ (self ,
37
- datapath = None ,
37
+ dataset_id = None ,
38
38
api_key = None ,
39
39
retry_count = 5 ,
40
- pause = 0.250 ,
40
+ pause = 0.5 ,
41
41
session = None ):
42
42
43
43
super (EnigmaReader , self ).__init__ (symbols = [],
@@ -49,55 +49,23 @@ def __init__(self,
49
49
raise ValueError ("Please provide an Enigma API key or set "
50
50
"the ENIGMA_API_KEY environment variable\n "
51
51
"If you do not have an API key, you can get "
52
- "one here: https ://app .enigma.io /signup" )
52
+ "one here: http ://public .enigma.com /signup" )
53
53
else :
54
54
self ._api_key = api_key
55
55
56
- self ._datapath = datapath
57
- if not isinstance (self ._datapath , compat .string_types ):
56
+ self ._dataset_id = dataset_id
57
+ if not isinstance (self ._dataset_id , compat .string_types ):
58
+ # TODO: test if string is valid UUID
58
59
raise ValueError (
59
- "The Enigma datapath must be a string (ex: "
60
- "'enigma.inspections.restaurants.fl')" )
61
-
62
- @property
63
- def url (self ):
64
- return 'https://api.enigma.io/v2/export/{}/{}' .format (self ._api_key ,
65
- self ._datapath )
66
-
67
- @property
68
- def export_key (self ):
69
- return 'export_url'
70
-
71
- @property
72
- def _head_key (self ):
73
- return 'head_url'
60
+ "The Enigma dataset_id must be a UUID4 string (ex: "
61
+ "'bedaf052-5fcd-4758-8d27-048ce8746c6a')" )
74
62
75
- def _request (self , url ):
76
- self .session .headers .update ({'User-Agent' : 'pandas-datareader' })
77
- resp = self .session .get (url )
78
- resp .raise_for_status ()
79
- return resp
80
-
81
- def _decompress_export (self , compressed_export_data ):
82
- return zlib .decompress (compressed_export_data , 16 + zlib .MAX_WBITS )
83
-
84
- def extract_export_url (self , delay = 10 , max_attempts = 10 ):
85
- """
86
- Performs an HTTP HEAD request on 'head_url' until it returns a `200`.
87
- This allows the Enigma API time to export the requested data.
88
- """
89
- resp = self ._request (self .url )
90
- attempts = 0
91
- while True :
92
- try :
93
- requests .head (resp .json ()[self ._head_key ]).raise_for_status ()
94
- except Exception as e :
95
- attempts += 1
96
- if attempts > max_attempts :
97
- raise e
98
- time .sleep (delay )
99
- continue
100
- return resp .json ()[self .export_key ]
63
+ headers = {
64
+ 'Authorization' : 'Bearer {0}' .format (self ._api_key ),
65
+ 'User-Agent' : 'pandas-datareader' ,
66
+ }
67
+ self .session .headers .update (headers )
68
+ self ._base_url = "https://public.enigma.com/api"
101
69
102
70
def read (self ):
103
71
try :
@@ -106,7 +74,33 @@ def read(self):
106
74
self .close ()
107
75
108
76
def _read (self ):
109
- export_gzipped_req = self ._request (self .extract_export_url ())
110
- decompressed_data = self ._decompress_export (
111
- export_gzipped_req .content ).decode ("utf-8" )
112
- return pd .read_csv (StringIO (decompressed_data ))
77
+ snapshot_id = self .get_current_snapshot_id (self ._dataset_id )
78
+ exported_data = self .get_snapshot_export (snapshot_id ) # TODO: Retry?
79
+ decoded_data = exported_data .decode ("utf-8" )
80
+ return pd .read_csv (StringIO (decoded_data ))
81
+
82
+ def _get (self , url ):
83
+ """HTTP GET Request"""
84
+ url = "{0}/{1}" .format (self ._base_url , url )
85
+ response = self .session .get (url )
86
+ response .raise_for_status ()
87
+ return response
88
+
89
+ def get_current_snapshot_id (self ):
90
+ """Get ID of the most current snapshot of a dataset"""
91
+ dataset_metadata = self .get_dataset_metadata ()
92
+ return dataset_metadata ['current_snapshot' ]['id' ]
93
+
94
+ def get_dataset_metadata (self , dataset_id ):
95
+ """Get the Dataset Model of this EnigmaReader's dataset
96
+ <Add Link to Model Docs>
97
+ """
98
+ url = "datasets/{0}?row_limit=0" .format (dataset_id )
99
+ response = self ._get (url )
100
+ return response .json ()
101
+
102
+ def get_snapshot_export (self , snapshot_id ):
103
+ """Return raw CSV of a dataset"""
104
+ url = "export/{0}" .format (snapshot_id )
105
+ response = self ._get (url )
106
+ return response .content
0 commit comments