19
19
20
20
import warnings
21
21
22
+ DEFAULT_SERVICE_NAME = "service"
23
+
22
24
def swifturl2d (name , container_name , object_name ):
23
25
return 'swift2d://{}.{}/{}' .format (container_name , name , object_name )
24
26
@@ -130,12 +132,6 @@ def __init__(self, sparkcontext, credentials, name=None, public=False, swift2d_d
130
132
This is not required at the moment, since credentials['name']
131
133
is still supported.
132
134
133
- When using this from a IBM Spark service instance that
134
- is configured to connect to particular Bluemix object store
135
- instances, the values for these credentials can be obtained
136
- by clicking on the 'insert to code' link just below a data
137
- source.
138
-
139
135
'''
140
136
141
137
if name :
@@ -174,7 +170,7 @@ def url(self, container_name, object_name):
174
170
175
171
class CloudObjectStorage (object ):
176
172
177
- def __init__ (self , sparkcontext , credentials , cos_id = '' , bucket_name = '' ):
173
+ def __init__ (self , sparkcontext , credentials , configuration_name = '' , bucket_name = '' ):
178
174
179
175
'''
180
176
sparkcontext: a SparkContext object.
@@ -184,28 +180,19 @@ def __init__(self, sparkcontext, credentials, cos_id='', bucket_name=''):
184
180
* access_key
185
181
* secret_key
186
182
187
- When using this on DSX credentials and bucket_name can be obtained
188
- in DSX - Notebooks by clicking on the datasources palette then
189
- choose the datasource you want to access then hit insert credentials.
190
-
191
- cos_id [optional]: this parameter is the cloud object storage unique id. It is useful
192
- to keep in the class instance for further checks after the initialization. However,
193
- it is not mandatory for the class instance to work. This value can be retrieved by
194
- calling the get_os_id function.
183
+ configuration_name [optional]: string that identifies this configuration. You can
184
+ use any string you like. This allows you to create
185
+ multiple configurations to different Object Storage accounts.
186
+ if a configuration name is not passed the default one will be used "service".
195
187
196
- bucket_name (projectId in DSX) [optional]: string that identifies the defult
188
+ bucket_name [optional]: string that identifies the defult
197
189
bucket nameyou want to access files from in the COS service instance.
198
- In DSX, bucket_name is the same as projectId. One bucket is
199
- associated with one project.
200
190
If this value is not specified, you need to pass it when
201
191
you use the url function.
202
192
203
- Warning: creating a new instance of this class would overwrite the existing
204
- spark hadoop configs if set before if used with the same spark context instance.
205
-
206
193
'''
207
194
self .bucket_name = bucket_name
208
- self .cos_id = cos_id
195
+ self .conf_name = configuration_name
209
196
210
197
# check if all required values are availble
211
198
credential_key_list = ["endpoint" , "access_key" , "secret_key" ]
@@ -216,22 +203,32 @@ def __init__(self, sparkcontext, credentials, cos_id='', bucket_name=''):
216
203
raise ValueError ("Invalid input: credentials.{} is required!" .format (key ))
217
204
218
205
# setup config
219
- prefix = "fs.s3d.service"
206
+ prefix = "fs.cos"
207
+
208
+ if (configuration_name ):
209
+ prefix = "{}.{}" .format (prefix , configuration_name )
210
+ else :
211
+ prefix = prefix + "." + DEFAULT_SERVICE_NAME
212
+
220
213
hconf = sparkcontext ._jsc .hadoopConfiguration ()
221
214
hconf .set (prefix + ".endpoint" , credentials ['endpoint' ])
222
215
hconf .set (prefix + ".access.key" , credentials ['access_key' ])
223
216
hconf .set (prefix + ".secret.key" , credentials ['secret_key' ])
224
217
225
- def get_os_id ():
226
- return self .cos_id
227
-
228
218
def url (self , object_name , bucket_name = '' ):
229
219
bucket_name_var = ''
220
+ service_name = DEFAULT_SERVICE_NAME
221
+
222
+ # determine the bucket to use
230
223
if (bucket_name ):
231
224
bucket_name_var = bucket_name
232
225
elif (self .bucket_name ):
233
226
bucket_name_var = self .bucket_name
234
227
else :
235
228
raise ValueError ("Invalid input: bucket_name is required!" )
236
229
237
- return "s3d://{}.service/{}" .format (bucket_name_var , object_name )
230
+ # use service name that we set up hadoop config for
231
+ if (self .conf_name ):
232
+ service_name = self .conf_name
233
+
234
+ return "cos://{}.{}/{}" .format (bucket_name_var , service_name , object_name )
0 commit comments