Skip to content

Commit 3475774

Browse files
Merge pull request #29 from ibm-watson-data-lab/bluemix_cos_support_scala
Bluemix Cloud Object Storage Support [Scala]
2 parents fc92a7f + eb33c1a commit 3475774

File tree

2 files changed

+138
-28
lines changed

2 files changed

+138
-28
lines changed

scala/README.md

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,13 +133,10 @@ within a DSX Jupyter notebook, you can obtain your account credentials in the fo
133133
If your Object Storage was created with a Softlayer account, each part of the credentials will
134134
be found as text that you can copy and paste into the example code below.
135135

136-
### IBM Cloud Object Storage / Data Science Experience
136+
### Softlayer IBM Cloud Object Storage (COS)
137137
```scala
138138
import com.ibm.ibmos2spark.CloudObjectStorage
139139

140-
// The credentials HashMap may be created for you with the
141-
// "insert to code" link in your DSX notebook.
142-
143140
var credentials = scala.collection.mutable.HashMap[String, String](
144141
"endPoint"->"https://identity.open.softlayer.com",
145142
"accessKey"->"xx",
@@ -161,8 +158,61 @@ var dfData1 = spark.
161158
load(cos.url(bucketName, objectname))
162159
```
163160

161+
### Bluemix IBM Cloud Object Storage (COS)
162+
The class CloudObjectStorage allows you to connect to an IBM bluemix COS. You can connect to
163+
a bluemix COS using api keys as follows:
164+
```scala
165+
import com.ibm.ibmos2spark.CloudObjectStorage
166+
167+
var credentials = scala.collection.mutable.HashMap[String, String](
168+
"endPoint"->"xxx",
169+
"apiKey"->"xxx",
170+
"serviceId"->"xxx"
171+
)
172+
var bucketName = "myBucket"
173+
var objectname = "mydata.csv"
174+
175+
var configurationName = "cos_config_name" // you can choose any string you want
176+
var cos = new CloudObjectStorage(sc, credentials, configurationName, "bluemix_cos")
177+
var spark = SparkSession.
178+
builder().
179+
getOrCreate()
180+
181+
var dfData1 = spark.
182+
read.format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat").
183+
option("header", "true").
184+
option("inferSchema", "true").
185+
load(cos.url(bucketName, objectname))
186+
```
187+
Alternatively, you can connect to a bluemix COS using IAM token. Example:
188+
```scala
189+
import com.ibm.ibmos2spark.CloudObjectStorage
190+
191+
// The credentials HashMap may be created for you with the
192+
// "insert to code" link in your DSX notebook.
193+
194+
var credentials = scala.collection.mutable.HashMap[String, String](
195+
"endPoint"->"xxx",
196+
"iamToken"->"xxx",
197+
"serviceId"->"xxx"
198+
)
199+
var bucketName = "myBucket"
200+
var objectname = "mydata.csv"
201+
202+
var configurationName = "cos_config_name" // you can choose any string you want
203+
var cos = new CloudObjectStorage(sc, credentials, configurationName, "bluemix_cos", "iam_token")
204+
var spark = SparkSession.
205+
builder().
206+
getOrCreate()
207+
208+
var dfData1 = spark.
209+
read.format("org.apache.spark.sql.execution.datasources.csv.CSVFileFormat").
210+
option("header", "true").
211+
option("inferSchema", "true").
212+
load(cos.url(bucketName, objectname))
213+
```
164214

165-
### Bluemix / Data Science Experience
215+
### Bluemix Swift Object Storage/ Data Science Experience
166216

167217

168218
```scala
@@ -189,7 +239,7 @@ var rdd = sc.textFile(bmos.url(container , objectname))
189239
```
190240

191241

192-
### Softlayer
242+
### Softlayer Swift Object Storage
193243

194244

195245

scala/src/main/scala/Osconfig.scala

Lines changed: 82 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -129,54 +129,114 @@ class bluemix(sc: SparkContext, name: String, creds: HashMap[String, String],
129129
}
130130

131131
/**
132-
* CloudObjectStorage class sets up a s3d connection between an IBM Spark service
133-
* instance and an IBM Cloud Object Storage instance.
132+
* This class allows you to connect to an IBM cloud object storage (COS) instance. It also support
133+
connecting to a COS instance that is being hosted on bluemix.
134134
135135
* Constructor arguments:
136136
137137
* sparkcontext: a SparkContext object.
138138
139-
* credentials: a dictionary with the following required keys:
140-
*
141-
* endpoint
142-
143-
* accessKey
144-
145-
* secretKey
139+
* credentials: a dictionary with the following required keys to connect to COS.
140+
The required keys differ according to the type of COS.
141+
* - for COS type "softlayer_cos" the following key are required:
142+
* endPoint [required]
143+
* accessKey [required]
144+
* secretKey [required]
145+
* - for COS type "bluemix_cos", here are the required/optional key:
146+
* endPoint [required]
147+
* serviceId [required]
148+
* apiKey OR iamToken depends on the selected authorization method (authMethod) [required]
149+
* iamServiceEndpoint [optional] (default: https://iam.ng.bluemix.net/oidc/token)
150+
* v2SignerType [optional]
146151
147152
* configurationName [optional]: string that identifies this configuration. You can
148153
use any string you like. This allows you to create
149154
multiple configurations to different Object Storage accounts.
150155
if a configuration name is not passed the default one will be used "service".
156+
157+
* cosType [optional]: string that identifies the type of COS to connect to. The supported types of COS
158+
are "softlayer_cos" and "bluemix_cos". "softlayer_cos" will be chosen as default if no cosType is passed.
159+
160+
* authMethod [optional]: string that identifies the type of authorization method to use when connecting to COS. This parameter
161+
is not reqired for softlayer_cos but only needed for bluemix_cos. Two options can be chosen for this params
162+
"api_key" or "iam_token". "api_key" will be chosen as default if the value is not set.
151163
*/
152-
class CloudObjectStorage(sc: SparkContext, credentials: HashMap[String, String], configurationName: String = "") {
164+
class CloudObjectStorage(sc: SparkContext, credentials: HashMap[String, String],
165+
configurationName: String = "", cosType: String = "softlayer_cos",
166+
authMethod: String = "api_key") {
153167

154-
// check if all credentials are available
155-
val requiredValues = Array("endPoint", "accessKey", "secretKey")
156-
for ( key <- requiredValues ) {
157-
if (!credentials.contains(key)) {
158-
throw new IllegalArgumentException("Invalid input: missing required input [" + key + "]")
159-
}
160-
}
168+
// check for valid credentials
169+
_validate_credentials(credentials, cosType, authMethod)
161170

162171
// set config
163172
val hadoopConf = sc.hadoopConfiguration
164-
val prefix = "fs.cos." + getConfigurationName()
173+
val prefix = "fs.cos." + _getConfigurationName()
165174

166175
hadoopConf.set(prefix + ".endpoint", credentials("endPoint"))
167-
hadoopConf.set(prefix + ".access.key", credentials("accessKey"))
168-
hadoopConf.set(prefix + ".secret.key", credentials("secretKey"))
169176

170-
private def getConfigurationName() : String = {
177+
if (cosType == "softlayer_cos") {
178+
// softlayer cos case
179+
hadoopConf.set(prefix + ".access.key", credentials("accessKey"))
180+
hadoopConf.set(prefix + ".secret.key", credentials("secretKey"))
181+
} else if (cosType == "bluemix_cos") {
182+
// bluemix COS case
183+
hadoopConf.set(prefix + ".iam.service.id", credentials("serviceId"))
184+
if (authMethod == "api_key") {
185+
hadoopConf.set(prefix + ".iam.api.key", credentials("apiKey"))
186+
} else if (authMethod == "iam_token") {
187+
hadoopConf.set(prefix + ".iam.token", credentials("iamToken"))
188+
}
189+
190+
if (credentials.contains("iamServiceEndpoint")) {
191+
hadoopConf.set(prefix + ".iam.endpoint", credentials("iamServiceEndpoint"))
192+
}
193+
194+
if (credentials.contains("v2SignerType")) {
195+
hadoopConf.set(prefix + ".v2.signer.type", credentials("v2SignerType"))
196+
}
197+
}
198+
199+
private def _getConfigurationName() : String = {
171200
if (configurationName != "") {
172201
return configurationName
173202
} else {
174203
return globalVariables.DEFAULT_SERVICE_NAME
175204
}
176205
}
177206

207+
private def _validate_credentials(credentials : HashMap[String, String], cosType : String, authMethod : String) = {
208+
val requiredKeys = _get_required_key_array(cosType, authMethod)
209+
210+
// check the existence of all required values in credentials
211+
for ( key <- requiredKeys ) {
212+
if (!credentials.contains(key)) {
213+
throw new IllegalArgumentException("Invalid input: missing required input [" + key + "]")
214+
}
215+
}
216+
}
217+
218+
private def _get_required_key_array(cosType : String, authMethod : String) : Array[String] = {
219+
val requiredKeySoftlayerCos = Array("endPoint", "accessKey", "secretKey")
220+
val requiredKeyListIamApiKey = Array("endPoint", "apiKey", "serviceId")
221+
val requiredKeyListIamToken = Array("endPoint", "iamToken", "serviceId")
222+
223+
if (cosType == "bluemix_cos") {
224+
if (authMethod == "api_key") {
225+
return requiredKeyListIamApiKey
226+
} else if (authMethod == "iam_token") {
227+
return requiredKeyListIamToken
228+
} else {
229+
throw new IllegalArgumentException("Invalid input: authMethod. authMethod is optional but if set, it should have one of the following values: api_key, iam_token")
230+
}
231+
} else if (cosType == "softlayer_cos") {
232+
return requiredKeySoftlayerCos
233+
} else {
234+
throw new IllegalArgumentException("Invalid input: cosType. cosType is optional but if set, it should have one of the following values: softlayer_cos, bluemix_cos")
235+
}
236+
}
237+
178238
def url(bucketName: String, objectName: String) : String = {
179-
var serviceName = getConfigurationName()
239+
var serviceName = _getConfigurationName()
180240
return "cos://" + bucketName + "." + serviceName + "/" + objectName
181241
}
182242
}

0 commit comments

Comments
 (0)