Skip to content

Commit fc92a7f

Browse files
Merge pull request #28 from ibm-watson-data-lab/bluemix_cos_support_python
Bluemix Cloud Object Storage Support [Python]
2 parents 3f7b519 + 2b8cd15 commit fc92a7f

File tree

2 files changed

+114
-18
lines changed

2 files changed

+114
-18
lines changed

python/README.md

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ within a DSX Jupyter notebook, you can obtain your account credentials in the fo
3232
If your Object Storage was created with a Softlayer account, each part of the credentials will
3333
be found as text that you can copy and paste into the example code below.
3434

35-
### CloudObjectStorage / Data Science Experience
35+
### Softlayer IBM Cloud Object Storage (COS)
3636
```python
3737
import ibmos2spark
3838

@@ -50,7 +50,47 @@ object_name = 'file1'
5050
data = sc.textFile(cos.url(object_name, bucket_name))
5151
```
5252

53-
### Bluemix / Data Science Experience
53+
### Bluemix IBM Cloud Object Storage (COS)
54+
The class CloudObjectStorage allows you to connect to an IBM bluemix COS. You can connect to a bluemix COS using api keys
55+
as follows:
56+
57+
```python
58+
import ibmos2spark
59+
60+
credentials = {
61+
'endpoint': 'XXX',
62+
'api_key': 'XXX',
63+
'service_id': 'XXX'
64+
}
65+
66+
configuration_name = 'os_bluemix_cos_config'
67+
cos = ibmos2spark.CloudObjectStorage(sc, credentials, configuration_name, 'bluemix_cos')
68+
69+
bucket_name = 'bucket_name'
70+
object_name = 'file_name'
71+
data = sc.textFile(cos.url(object_name, bucket_name))
72+
```
73+
74+
Alternatively, you can connect to an IBM bluemix COS using IAM token. Example:
75+
```python
76+
import ibmos2spark
77+
78+
credentials = {
79+
'endpoint': 'XXX',
80+
'iam_token': 'eyJraWQXXXX .... X',
81+
'service_id': 'XXX'
82+
}
83+
84+
configuration_name = 'os_bluemix_cos_config'
85+
cos = ibmos2spark.CloudObjectStorage(sc, credentials, configuration_name, 'bluemix_cos', 'iam_token')
86+
87+
bucket_name = 'bucket_name'
88+
object_name = 'file_name'
89+
data = sc.textFile(cos.url(object_name, bucket_name))
90+
```
91+
92+
93+
### Bluemix Swift Object Storage / Data Science Experience
5494

5595
```python
5696
import ibmos2spark
@@ -75,7 +115,7 @@ data = sc.textFile(bmos.url(container_name, object_name))
75115
```
76116

77117

78-
### Softlayer
118+
### Softlayer Swift Object Storage
79119

80120

81121
```python

python/ibmos2spark/osconfig.py

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -170,37 +170,50 @@ def url(self, container_name, object_name):
170170

171171
class CloudObjectStorage(object):
172172

173-
def __init__(self, sparkcontext, credentials, configuration_name='', bucket_name=''):
173+
def __init__(self, sparkcontext, credentials, configuration_name='', cos_type='softlayer_cos', auth_method='api_key', bucket_name=''):
174174

175175
'''
176+
This class allows you to connect to an IBM cloud object storage (COS) instance. It also support connecting to an IBM COS instance
177+
that is being hosted on bluemix.
178+
176179
sparkcontext: a SparkContext object.
177180
178-
credentials: a dictionary with the following required keys:
179-
* endpoint
180-
* access_key
181-
* secret_key
181+
credentials: a dictionary with the required keys to connect to an IBM COS. The required keys differ according
182+
to the type of COS.
183+
- for COS type "softlayer_cos" the following key are required:
184+
* endpoint
185+
* access_key
186+
* secret_key
187+
- for COS type "bluemix_cos", here are the required/optional key:
188+
* endpoint [required]
189+
* service_id [required]
190+
* api_key OR iam_token depends on the selected authorization method (auth_method) [required]
191+
* iam_service_endpoint [optional] (default: https://iam.ng.bluemix.net/oidc/token)
192+
* v2_signer_type [optional]
182193
183194
configuration_name [optional]: string that identifies this configuration. You can
184195
use any string you like. This allows you to create
185196
multiple configurations to different Object Storage accounts.
186197
if a configuration name is not passed the default one will be used "service".
187198
199+
cos_type [optional]: string that identifies the type of COS to connect to. The supported types of COS
200+
are "softlayer_cos" and "bluemix_cos". "softlayer_cos" will be chosen as default if no cos_type is passed.
201+
202+
auth_method [optional]: string that identifies the type of authorization to use when connecting to an IBM COS. This parameter
203+
is not reqired for softlayer_cos but only needed for bluemix_cos. Two options can be chosen for this params
204+
"api_key" or "iam_token". "api_key" will be chosen as default if the value is not set.
205+
188206
bucket_name [optional]: string that identifies the defult
189207
bucket nameyou want to access files from in the COS service instance.
190208
If this value is not specified, you need to pass it when
191209
you use the url function.
192210
193211
'''
194-
self.bucket_name = bucket_name
195-
self.conf_name = configuration_name
196-
197212
# check if all required values are availble
198-
credential_key_list = ["endpoint", "access_key", "secret_key"]
213+
self._validate_input(credentials, cos_type, auth_method)
199214

200-
for i in range(len(credential_key_list)):
201-
key = credential_key_list[i]
202-
if (not key in credentials):
203-
raise ValueError("Invalid input: credentials.{} is required!".format(key))
215+
self.bucket_name = bucket_name
216+
self.conf_name = configuration_name
204217

205218
# setup config
206219
prefix = "fs.cos"
@@ -212,8 +225,51 @@ def __init__(self, sparkcontext, credentials, configuration_name='', bucket_name
212225

213226
hconf = sparkcontext._jsc.hadoopConfiguration()
214227
hconf.set(prefix + ".endpoint", credentials['endpoint'])
215-
hconf.set(prefix + ".access.key", credentials['access_key'])
216-
hconf.set(prefix + ".secret.key", credentials['secret_key'])
228+
229+
# softlayer cos case
230+
if (cos_type == "softlayer_cos"):
231+
hconf.set(prefix + ".access.key", credentials['access_key'])
232+
hconf.set(prefix + ".secret.key", credentials['secret_key'])
233+
234+
# bluemix cos case
235+
elif (cos_type == "bluemix_cos"):
236+
hconf.set(prefix + ".iam.service.id", credentials['service_id'])
237+
if (auth_method == "api_key"):
238+
hconf.set(prefix + ".iam.api.key", credentials['api_key'])
239+
elif (auth_method == "iam_token"):
240+
hconf.set(prefix + ".iam.token", credentials['iam_token'])
241+
242+
if (credentials.get('iam_service_endpoint')):
243+
hconf.set(prefix + ".iam.endpoint", credentials['iam_service_endpoint'])
244+
245+
if (credentials.get('v2_signer_type')):
246+
hconf.set(prefix + ".v2.signer.type", credentials['v2_signer_type'])
247+
248+
def _validate_input(self, credentials, cos_type, auth_method):
249+
required_key_softlayer_cos = ["endpoint", "access_key", "secret_key"]
250+
required_key_list_iam_api_key = ["endpoint", "api_key", "service_id"]
251+
required_key_list_iam_token = ["endpoint", "iam_token", "service_id"]
252+
253+
def _get_required_keys(cos_type, auth_method):
254+
if (cos_type == "bluemix_cos"):
255+
if (auth_method == "api_key"):
256+
return required_key_list_iam_api_key
257+
elif (auth_method == "iam_token"):
258+
return required_key_list_iam_token
259+
else:
260+
raise ValueError("Invalid input: auth_method. auth_method is optional but if set, it should have one of the following values: api_key, iam_token")
261+
elif (cos_type == "softlayer_cos"):
262+
return required_key_softlayer_cos
263+
else:
264+
raise ValueError("Invalid input: cos_type. cos_type is optional but if set, it should have one of the following values: softlayer_cos, bluemix_cos")
265+
266+
# check keys
267+
required_key_list = _get_required_keys(cos_type, auth_method)
268+
269+
for i in range(len(required_key_list)):
270+
key = required_key_list[i]
271+
if (key not in credentials):
272+
raise ValueError("Invalid input: credentials. {} is required!".format(key))
217273

218274
def url(self, object_name, bucket_name=''):
219275
bucket_name_var = ''

0 commit comments

Comments
 (0)