Skip to content

Commit 1daf367

Browse files
Merge pull request #31 from ibm-watson-data-lab/bluemix_cos_support_r
Bluemix Cloud Object Storage Support [R]
2 parents 3475774 + 71924e8 commit 1daf367

File tree

2 files changed

+136
-27
lines changed

2 files changed

+136
-27
lines changed

r/sparkr/R/osconfig.R

Lines changed: 83 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -115,46 +115,74 @@ bluemix <- setRefClass("bluemix",
115115
)
116116

117117
#' CloudObjectStorage is a class that is designed for IBM cloud object storage (COS)
118-
#' It sets up the hadoop config for COS and provide the final file url.
118+
#' It sets up the hadoop config for COS and provide the final file url. It also supports
119+
# connecting to an IBM COS instance that is being hosted on bluemix.
119120
#'
120121
#' sparkContext: a SparkContext object.
121122
#''
122-
#' credentials: a dictionary with the following required keys:
123-
#' endpoint
124-
#' accessKey
125-
#' secretKey
123+
#' credentials: a dictionary with the required keys to connect to an IBM cloud object storage.
124+
#' The required keys differ according to the type of COS.
125+
#' - for COS type "softlayer_cos" the following keys are required:
126+
#' endpoint [required]
127+
#' accessKey [required]
128+
#' secretKey [required]
129+
#' - for COS type "bluemix_cos", here are the required/optional key:
130+
#' endPoint [required]
131+
#' serviceId [required]
132+
#' apiKey OR iamToken depends on the selected authorization method (authMethod) [required]
133+
#' iamServiceEndpoint [optional] (default: https://iam.ng.bluemix.net/oidc/token)
134+
#' v2SignerType [optional]
135+
#'
136+
#' configurationName: string identifies the configurations to be set.
137+
#'
126138
#'
127-
#' configurationName: string identifies the configurations that has been
128-
#' set.
139+
#' cosType [optional]: string that identifies the type of COS to connect to. The supported types of COS
140+
#' are "softlayer_cos" and "bluemix_cos". "softlayer_cos" will be chosen as default if no cosType is passed.
141+
#'
142+
#' authMethod [optional]: string that identifies the type of authorization method to use when connecting to COS. This parameter
143+
#' is not reqired for softlayer_cos but only needed for bluemix_cos. Two options can be chosen for this params
144+
#' "api_key" or "iam_token". "api_key" will be chosen as default if the value is not set.
129145
#' @export CloudObjectStorage
130146
#' @exportClass CloudObjectStorage
131147
CloudObjectStorage <- setRefClass("CloudObjectStorage",
132-
fields=list(configName="character"),
148+
fields=list(configName="character", cosType="character", authMethod="character"),
133149
methods=list(
134-
initialize = function(..., sparkContext, credentials, configurationName="") {
135-
150+
initialize = function(..., sparkContext, credentials, configurationName="",
151+
cosType="softlayer_cos", authMethod="api_key") {
136152

137-
if (is.null(credentials["endpoint"][[1]])) {
138-
stop("Attribute endpoint in credentials is missing!")
139-
}
140-
141-
if (is.null(credentials["accessKey"][[1]])) {
142-
stop("Attribute accessKey in credentials is missing!")
143-
}
144-
145-
if (is.null(credentials["secretKey"][[1]])) {
146-
stop("Attribute secretKey in credentials is missing!")
147-
}
153+
# validate input
154+
validateInput(credentials, cosType, authMethod)
148155

149156
# bind config name
150157
.self$configName = configurationName
151158

152-
# set prefix for hadoop config
159+
# set up hadoop config
153160
prefix = paste("fs.cos", getConfigName(), sep='.')
154161
hConf = SparkR:::callJMethod(sparkContext, "hadoopConfiguration")
162+
155163
SparkR:::callJMethod(hConf, "set", paste(prefix, "endpoint", sep='.'), credentials['endpoint'][[1]])
156-
SparkR:::callJMethod(hConf, "set", paste(prefix, "access.key", sep='.'), credentials['accessKey'][[1]])
157-
SparkR:::callJMethod(hConf, "set", paste(prefix, "secret.key", sep='.'), credentials['secretKey'][[1]])
164+
165+
if (cosType == "softlayer_cos") {
166+
# softlayer COS case
167+
SparkR:::callJMethod(hConf, "set", paste(prefix, "access.key", sep='.'), credentials['accessKey'][[1]])
168+
SparkR:::callJMethod(hConf, "set", paste(prefix, "secret.key", sep='.'), credentials['secretKey'][[1]])
169+
} else if (cosType == "bluemix_cos") {
170+
# bluemix COS case
171+
SparkR:::callJMethod(hConf, "set", paste(prefix, "iam.service.id", sep='.'), credentials['serviceId'][[1]])
172+
if (authMethod == "api_key") {
173+
SparkR:::callJMethod(hConf, "set", paste(prefix, "iam.api.key", sep='.'), credentials['apiKey'][[1]])
174+
} else if (authMethod == "iam_token") {
175+
SparkR:::callJMethod(hConf, "set", paste(prefix, "iam.token", sep='.'), credentials['iamToken'][[1]])
176+
}
177+
178+
if ("iamServiceEndpoint" %in% names(credentials)) {
179+
SparkR:::callJMethod(hConf, "set", paste(prefix, "iam.endpoint", sep='.'), credentials['iamServiceEndpoint'][[1]])
180+
}
181+
182+
if ("v2SignerType" %in% names(credentials)) {
183+
SparkR:::callJMethod(hConf, "set", paste(prefix, "v2.signer.type", sep='.'), credentials['v2SignerType'][[1]])
184+
}
185+
}
158186
},
159187

160188
getConfigName = function() {
@@ -164,6 +192,37 @@ CloudObjectStorage <- setRefClass("CloudObjectStorage",
164192
return ("service")
165193
},
166194

195+
validateInput = function (credentials, cosType, authMethod) {
196+
requiredKeys = get_required_key_array(cosType, authMethod)
197+
198+
# check the existence of all required values in credentials
199+
for (key in requiredKeys) {
200+
if (!key %in% names(credentials)) {
201+
stop(paste("Invalid input: missing required input [", key, "]!", sep=''))
202+
}
203+
}
204+
},
205+
206+
get_required_key_array = function (cosType, authMethod) {
207+
requiredKeySoftlayerCos = list("endpoint", "accessKey", "secretKey")
208+
requiredKeyListIamApiKey = list("endpoint", "apiKey", "serviceId")
209+
requiredKeyListIamToken = list("endpoint", "iamToken", "serviceId")
210+
211+
if (cosType == "bluemix_cos") {
212+
if (authMethod == "api_key") {
213+
return (requiredKeyListIamApiKey)
214+
} else if (authMethod == "iam_token") {
215+
return (requiredKeyListIamToken)
216+
} else {
217+
stop("Invalid input: authMethod. authMethod is optional but if set, it should have one of the following values: api_key, iam_token")
218+
}
219+
} else if (cosType == "softlayer_cos") {
220+
return (requiredKeySoftlayerCos)
221+
} else {
222+
stop("Invalid input: cosType. cosType is optional but if set, it should have one of the following values: softlayer_cos, bluemix_cos")
223+
}
224+
},
225+
167226
url = function(bucketName, objectName) {
168227
serviceName = getConfigName()
169228
return (paste("cos://", bucketName, ".", serviceName, "/", objectName, sep = ""))

r/sparkr/README.md

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ within a DSX Jupyter notebook, you can obtain your account credentials in the fo
3737
If your Object Storage was created with a Softlayer account, each part of the credentials will
3838
be found as text that you can copy and paste into the example code below.
3939

40-
### Cloud Object Storage
40+
### Softlayer - IBM Cloud Object Storage (COS)
4141
library(ibmos2sparkR)
4242
configurationName = "bluemixO123"
4343

@@ -60,8 +60,58 @@ be found as text that you can copy and paste into the example code below.
6060
header = "true")
6161
head(df.data.1)
6262

63+
### Bluemix - IBM Cloud Object Storage (COS)
64+
The class CloudObjectStorage allows you to connect to an IBM Cloud Object Storage (COS) hosted on Bluemix. You can connect to a Bluemix COS using api keys as follows:
6365

64-
### Bluemix / Data Science Experience
66+
library(ibmos2sparkR)
67+
configurationName = "bluemixO123"
68+
69+
# In DSX notebooks, the "insert to code" will insert this credentials list for you
70+
credentials <- list(
71+
apiKey = "XXX",
72+
serviceId = "XXX",
73+
endpoint = "https://s3-api.objectstorage.....net/"
74+
)
75+
76+
cos <- CloudObjectStorage(sparkContext=sc, credentials=credentials, configurationName=configurationName, cosType="bluemix_cos")
77+
78+
bucketName <- "bucketName"
79+
fileName <- "test.csv"
80+
url <- cos$url(bucketName, fileName)
81+
82+
invisible(sparkR.session(appName = "SparkSession R"))
83+
84+
df.data.1 <- read.df(url,
85+
source = "org.apache.spark.sql.execution.datasources.csv.CSVFileFormat",
86+
header = "true")
87+
head(df.data.1)
88+
89+
Alternatively, you can connect to an IBM Bluemix COS using IAM token. Example:
90+
91+
library(ibmos2sparkR)
92+
configurationName = "bluemixO123"
93+
94+
# In DSX notebooks, the "insert to code" will insert this credentials list for you
95+
credentials <- list(
96+
iamToken = "XXXXXXXXX",
97+
serviceId = "XXX",
98+
endpoint = "https://s3-api.objectstorage.....net/"
99+
)
100+
101+
cos <- CloudObjectStorage(sparkContext=sc, credentials=credentials, configurationName=configurationName, cosType="bluemix_cos", authMethod="iam_token")
102+
103+
bucketName <- "bucketName"
104+
fileName <- "test.csv"
105+
url <- cos$url(bucketName, fileName)
106+
107+
invisible(sparkR.session(appName = "SparkSession R"))
108+
109+
df.data.1 <- read.df(url,
110+
source = "org.apache.spark.sql.execution.datasources.csv.CSVFileFormat",
111+
header = "true")
112+
head(df.data.1)
113+
114+
### Bluemix Swift Object Storage / Data Science Experience
65115

66116
library(ibmos2sparkR)
67117
configurationname = "bluemixOScon" #can be any any name you like (allows for multiple configurations)
@@ -86,7 +136,7 @@ be found as text that you can copy and paste into the example code below.
86136
data = read.df(bmconfig$url(container, objectname), source="com.databricks.spark.csv", header="true")
87137

88138

89-
### Softlayer
139+
### Softlayer Swift Object Storage
90140

91141
library(ibmos2sparkR)
92142
configurationname = "softlayerOScon" #can be any any name you like (allows for multiple configurations)

0 commit comments

Comments
 (0)