Skip to content

Commit 54ae7d6

Browse files
author
G Adam Cox
authored
Merge pull request #8 from ibm-cds-labs/adding_r_scala_platform
Create sparklyr based R package
2 parents 0c9d6ed + 6fcd8cc commit 54ae7d6

16 files changed

+500
-9
lines changed

r/sparklyr/DESCRIPTION

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
Package: ibmos2sparklyr
2+
Title: Loads Object Store data into Softlayer and Bluemix
3+
Version: 0.0.7
4+
Authors@R: person("Jim", "Crozier", email = "[email protected]",
5+
role = c("aut", "cre"))
6+
Description: Loads data from Object Store in Softlayer and Bluemix with spark context built on sparklyr.
7+
Depends:
8+
R (>= 3.1.0)
9+
License: Apache
10+
LazyData: true
11+
RoxygenNote: 5.0.1
12+
Imports:
13+
sparklyr
File renamed without changes.

r/sparklyr/NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
exportPattern("^[[:alpha:]]+")

r/sparklyr/R/osconfig.R

Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
setOldClass("jobj")
2+
3+
4+
swifturl = function(name, container_name, object_name){
5+
return(paste0('swift2d://',container_name,'.',name,'/',object_name))
6+
}
7+
8+
9+
10+
#' sparkcontext is a SparkContext object.
11+
#'
12+
#' name is a string that identifies this configuration. You can
13+
#' use any string you like. This allows you to create
14+
#' multiple configurations to different Object Storage accounts.
15+
#' auth_url, username and password are string credentials for your
16+
#' Softlayer Object Store
17+
#' @export softlayer
18+
#' @exportClass softlayer
19+
20+
softlayer <- setRefClass("softlayer",
21+
fields=list(name="character", container_name="character", object_name="character",
22+
sparkcontext='jobj', auth_url="character",
23+
tenant = "character", username="character", password="character"),
24+
methods=list(initialize =
25+
function( sparkcontext, name, auth_url, tenant, username, password,public=FALSE,
26+
swift2d_driver='com.ibm.stocator.fs.ObjectStoreFileSystem'){
27+
28+
.self$name = name
29+
# get spark_context
30+
ctx <- spark_context(sparkcontext)
31+
32+
#set the java spark context
33+
jsc <- invoke_static(
34+
sparkcontext,
35+
"org.apache.spark.api.java.JavaSparkContext",
36+
"fromSparkContext",
37+
ctx
38+
)
39+
40+
#set the swift configs:
41+
prefix = paste("fs.swift2d.service" , name, sep =".")
42+
hconf = jsc %>% invoke("hadoopConfiguration")
43+
hconf %>% invoke("set", "fs.swift2d.impl", swift2d_driver)
44+
hconf %>% invoke("set", paste(prefix, "auth.url", sep='.'), auth_url)
45+
hconf %>% invoke("set", paste(prefix, "username", sep='.'), username)
46+
hconf %>% invoke("set", paste(prefix, "tenant", sep='.'), tenant)
47+
hconf %>% invoke("set", paste(prefix, "auth.endpoint.prefix", sep='.'), "endpoints")
48+
hconf %>% invoke("set", paste(prefix, "auth.method", sep='.'), "swiftauth")
49+
hconf %>% invoke("set", paste(prefix, "http.port", sep='.'), "8080")
50+
hconf %>% invoke("set", paste(prefix, "apikey", sep='.'), password)
51+
invisible(hconf %>% invoke("setBoolean", paste(prefix, "public", sep='.'), public))
52+
hconf %>% invoke("set", paste(prefix, "use.get.auth", sep='.'), "true")
53+
invisible(hconf %>% invoke("setBoolean", paste(prefix, "location-aware", sep='.'), FALSE))
54+
hconf %>% invoke("set", paste(prefix, "password", sep='.'), password)
55+
56+
57+
58+
},
59+
60+
url = function(container_name, object_name){
61+
return(swifturl(name, container_name, object_name))}
62+
)
63+
)
64+
65+
66+
67+
#' sparkcontext: a SparkContext object.
68+
#'
69+
#' credentials: a dictionary with the following required keys:
70+
#'
71+
#' auth_url
72+
#' project_id (or projectId)
73+
#' user_id (or userId)
74+
#' password
75+
#' region
76+
#' and optional key:
77+
#' name #[to be deprecated] The name of the configuration.
78+
#' name: string that identifies this configuration. You can
79+
#' use any string you like. This allows you to create
80+
#' multiple configurations to different Object Storage accounts.
81+
#' This is not required at the moment, since credentials['name']
82+
#' is still supported.
83+
#' When using this from a IBM Spark service instance that
84+
#' is configured to connect to particular Bluemix object store
85+
#' instances, the values for these credentials can be obtained
86+
#' by clicking on the 'insert to code' link just below a data
87+
#' source.
88+
#' @export bluemix
89+
#' @exportClass bluemix
90+
91+
92+
bluemix <- setRefClass("bluemix",
93+
fields=list(name="character", credentials = "list",
94+
sparkcontext='jobj', public = "character"),
95+
methods=list(initialize =
96+
function(..., sparkcontext, name=NULL, credentials,
97+
public=FALSE,swift2d_driver='com.ibm.stocator.fs.ObjectStoreFileSystem'){
98+
99+
callSuper(...,credentials=credentials)
100+
101+
if ( is.null(name)) name <<- credentials["name"][[1]]
102+
103+
user_id = try( credentials['user_id'][[1]])
104+
if(class(user_id)=="try-error") user_id = credentials['userId'][[1]]
105+
106+
tenant = try( credentials['project_id'][[1]])
107+
if(class(tenant)=="try-error") tenant = credentials['projectId'][[1]]
108+
109+
.self$name = name
110+
# get spark_context
111+
ctx <- spark_context(sparkcontext)
112+
113+
#set the java spark context
114+
jsc <- invoke_static(
115+
sparkcontext,
116+
"org.apache.spark.api.java.JavaSparkContext",
117+
"fromSparkContext",
118+
ctx
119+
)
120+
121+
#set the swift configs:
122+
prefix = paste("fs.swift2d.service" , name, sep =".")
123+
hconf = jsc %>% invoke("hadoopConfiguration")
124+
hconf %>% invoke("set", "fs.swift2d.impl", swift2d_driver)
125+
hconf %>% invoke("set", paste(prefix, "auth.url", sep='.'), paste(credentials['auth_url'][[1]],"/v3/auth/tokens",sep=""))
126+
hconf %>% invoke("set", paste(prefix, "auth.endpoint.prefix", sep='.'), "endpoints")
127+
hconf %>% invoke("set", paste(prefix, "tenant", sep='.'), tenant)
128+
hconf %>% invoke("set", paste(prefix, "username", sep='.'), user_id)
129+
hconf %>% invoke("set", paste(prefix, "password", sep='.'), credentials['password'][[1]])
130+
hconf %>% invoke("set", paste(prefix, "auth.method", sep='.'), "keystoneV3")
131+
hconf %>% invoke("set", paste(prefix, "region", sep='.'), credentials['region'][[1]])
132+
invisible(hconf %>% invoke("setBoolean", paste(prefix, "public", sep='.'), public))
133+
#invisible(SparkR:::callJMethod(hConf, "setInt", paste(prefix, "http.port", sep='.'), 8080))
134+
},
135+
136+
url = function( container_name, object_name){
137+
return(swifturl(name, container_name, object_name))}
138+
)
139+
)

r/sparklyr/README.md

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# ibmos2sparklyr
2+
3+
The package sets Spark Hadoop configurations for connecting to
4+
IBM Bluemix Object Storage and Softlayer Account Object Storage instances
5+
with the swift protocol. This packages uses the new
6+
[swift2d/stocator](https://github.com/SparkTC/stocator) protocol, availble
7+
on the latest IBM Spark Service instances and through IBM Data Science Experience (DSX).
8+
9+
This package expects a SparkContext instantiated by sparklyr. It has been tested
10+
to work with IBM RStudio from DSX, though it should work with other Spark
11+
installations that utilize the [swift2d/stocator](https://github.com/SparkTC/stocator).
12+
13+
## Installation
14+
15+
library(devtools)
16+
devtools::install_url("https://github.com/ibm-cds-labs/ibmos2spark/archive/<version>.zip", subdir= "r/sparklyr/",dependencies = FALSE)
17+
18+
where `version` should be a tagged release, such as `0.0.7`. (If you're daring, you can use `master`.)
19+
20+
###### WARNING
21+
22+
In IBM Data Science Experience, please be sure to include the `dependencies = FALSE` flag when
23+
calling `devtools::install_url`. If you forget to do this, you will most likely overwrite DSX's
24+
special flavor of sparklyr, which will break your connection to IBM Spark Services. To repair this,
25+
go into your local R repo store ("/home/rstudio/R/x86_64-redhat-linux-gnu-library/RVERSION/")
26+
where RVERSION is the newest install of R (currently 3.3) and delete the `sparklyr` folder.
27+
After deleting, choose File->Quit Session to refresh your R kernel. These steps will refresh your
28+
sparklyr package to the special DSX version.
29+
30+
## Usage
31+
32+
### Bluemix
33+
34+
library(ibmos2sparklyr)
35+
configurationname = "bluemixOScon" #can be any any name you like (allows for multiple configurations)
36+
37+
# In DSX notebooks, the "insert to code" will insert this credentials list for you
38+
creds = list(
39+
auth_url="https://identity.open.softlayer.com",
40+
region="dallas",
41+
project_id = "XXXXX",
42+
user_id="XXXXX",
43+
password="XXXXX")
44+
45+
bmconfig = bluemix(sparkcontext=sc, name=configurationname, credentials = creds)
46+
47+
container = "my_container" # name of your object store container
48+
object = "my_data.csv" # name of object that you want to retrieve in the container
49+
spark_object_name = "dataFromSwift" # name to assign to the new spark object
50+
51+
data = sparklyr::spark_read_csv(sc, spark_object_name,bmconfig$url(container,object))
52+
53+
54+
### Softlayer
55+
56+
library(ibmos2sparklyr)
57+
configurationname = "softlayerOScon" #can be any any name you like (allows for multiple configurations)
58+
59+
slconfig = softlayer(sparkcontext=sc,
60+
name=configurationname,
61+
auth_url="https://identity.open.softlayer.com",
62+
tenant = "XXXXX",
63+
username="XXXXX",
64+
password="XXXXX"
65+
)
66+
67+
container = "my_container" # name of your object store container
68+
object = "my_data.csv" # name of object that you want to retrieve in the container
69+
spark_object_name = "dataFromSwift" # name to assign to the new spark object
70+
71+
data = sparklyr::spark_read_csv(sc, spark_object_name,slconfig$url(container,object))
72+
73+
74+
75+
## License
76+
77+
Copyright 2016 IBM Cloud Data Services
78+
79+
Licensed under the Apache License, Version 2.0 (the "License");
80+
you may not use this file except in compliance with the License.
81+
You may obtain a copy of the License at
82+
83+
http://www.apache.org/licenses/LICENSE-2.0
84+
85+
Unless required by applicable law or agreed to in writing, software
86+
distributed under the License is distributed on an "AS IS" BASIS,
87+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
88+
See the License for the specific language governing permissions and
89+
limitations under the License.

r/sparklyr/man/bluemix-class.Rd

Lines changed: 30 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/sparklyr/man/softlayer-class.Rd

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

r/DESCRIPTION renamed to r/sparkr/DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
Package: ibmos2spark
1+
Package: ibmos2sparkR
22
Title: Loads Object Store data into Softlayer and Bluemix
33
Version: 0.0.7
44
Authors@R: person("Jim", "Crozier", email = "[email protected]",

0 commit comments

Comments
 (0)