Skip to content

Commit 7d174ca

Browse files
Merge pull request #22 from ibm-watson-data-lab/cos_scheme_support_python
COS Scheme Support + Multiple Config Support [Python]
2 parents f857cca + c34e1c2 commit 7d174ca

File tree

2 files changed

+27
-29
lines changed

2 files changed

+27
-29
lines changed

python/README.md

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ and adding it to your local Apache Spark kernel's classpath.
1414

1515
## Installation
1616

17-
This library is now installed by default on IBM Apache Spark.
17+
This library is now installed by default on IBM Apache Spark.
1818

1919
```
2020
pip install --user --upgrade ibmos2spark
@@ -42,7 +42,8 @@ credentials = {
4242
'secret_key': ''
4343
}
4444

45-
cos = ibmos2spark.CloudObjectStorage(sc, credentials) #sc is the SparkContext instance
45+
configuration_name = 'cos_config_string' #you can give any string you like
46+
cos = ibmos2spark.CloudObjectStorage(sc, credentials, configuration_name) #sc is the SparkContext instance.
4647

4748
bucket_name = 'some_bucket_name'
4849
object_name = 'file1'

python/ibmos2spark/osconfig.py

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
import warnings
2121

22+
DEFAULT_SERVICE_NAME = "service"
23+
2224
def swifturl2d(name, container_name, object_name):
2325
return 'swift2d://{}.{}/{}'.format(container_name, name, object_name)
2426

@@ -130,12 +132,6 @@ def __init__(self, sparkcontext, credentials, name=None, public=False, swift2d_d
130132
This is not required at the moment, since credentials['name']
131133
is still supported.
132134
133-
When using this from a IBM Spark service instance that
134-
is configured to connect to particular Bluemix object store
135-
instances, the values for these credentials can be obtained
136-
by clicking on the 'insert to code' link just below a data
137-
source.
138-
139135
'''
140136

141137
if name:
@@ -174,7 +170,7 @@ def url(self, container_name, object_name):
174170

175171
class CloudObjectStorage(object):
176172

177-
def __init__(self, sparkcontext, credentials, cos_id='', bucket_name=''):
173+
def __init__(self, sparkcontext, credentials, configuration_name='', bucket_name=''):
178174

179175
'''
180176
sparkcontext: a SparkContext object.
@@ -184,28 +180,19 @@ def __init__(self, sparkcontext, credentials, cos_id='', bucket_name=''):
184180
* access_key
185181
* secret_key
186182
187-
When using this on DSX credentials and bucket_name can be obtained
188-
in DSX - Notebooks by clicking on the datasources palette then
189-
choose the datasource you want to access then hit insert credentials.
190-
191-
cos_id [optional]: this parameter is the cloud object storage unique id. It is useful
192-
to keep in the class instance for further checks after the initialization. However,
193-
it is not mandatory for the class instance to work. This value can be retrieved by
194-
calling the get_os_id function.
183+
configuration_name [optional]: string that identifies this configuration. You can
184+
use any string you like. This allows you to create
185+
multiple configurations to different Object Storage accounts.
186+
if a configuration name is not passed the default one will be used "service".
195187
196-
bucket_name (projectId in DSX) [optional]: string that identifies the defult
188+
bucket_name [optional]: string that identifies the defult
197189
bucket nameyou want to access files from in the COS service instance.
198-
In DSX, bucket_name is the same as projectId. One bucket is
199-
associated with one project.
200190
If this value is not specified, you need to pass it when
201191
you use the url function.
202192
203-
Warning: creating a new instance of this class would overwrite the existing
204-
spark hadoop configs if set before if used with the same spark context instance.
205-
206193
'''
207194
self.bucket_name = bucket_name
208-
self.cos_id = cos_id
195+
self.conf_name = configuration_name
209196

210197
# check if all required values are availble
211198
credential_key_list = ["endpoint", "access_key", "secret_key"]
@@ -216,22 +203,32 @@ def __init__(self, sparkcontext, credentials, cos_id='', bucket_name=''):
216203
raise ValueError("Invalid input: credentials.{} is required!".format(key))
217204

218205
# setup config
219-
prefix = "fs.s3d.service"
206+
prefix = "fs.cos"
207+
208+
if (configuration_name):
209+
prefix = "{}.{}".format(prefix, configuration_name)
210+
else:
211+
prefix = prefix + "." + DEFAULT_SERVICE_NAME
212+
220213
hconf = sparkcontext._jsc.hadoopConfiguration()
221214
hconf.set(prefix + ".endpoint", credentials['endpoint'])
222215
hconf.set(prefix + ".access.key", credentials['access_key'])
223216
hconf.set(prefix + ".secret.key", credentials['secret_key'])
224217

225-
def get_os_id():
226-
return self.cos_id
227-
228218
def url(self, object_name, bucket_name=''):
229219
bucket_name_var = ''
220+
service_name = DEFAULT_SERVICE_NAME
221+
222+
# determine the bucket to use
230223
if (bucket_name):
231224
bucket_name_var = bucket_name
232225
elif (self.bucket_name):
233226
bucket_name_var = self.bucket_name
234227
else:
235228
raise ValueError("Invalid input: bucket_name is required!")
236229

237-
return "s3d://{}.service/{}".format(bucket_name_var, object_name)
230+
# use service name that we set up hadoop config for
231+
if (self.conf_name):
232+
service_name = self.conf_name
233+
234+
return "cos://{}.{}/{}".format(bucket_name_var, service_name, object_name)

0 commit comments

Comments
 (0)