Skip to content

Commit 8f56734

Browse files
author
Robert Kruszewski
committed
Merge branch 'master' into rk/more-upstreamn
2 parents 1c56118 + 594ac4f commit 8f56734

File tree

592 files changed

+25764
-5245
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

592 files changed

+25764
-5245
lines changed

LICENSE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ The text of each license is also included at licenses/LICENSE-[project].txt.
237237

238238
(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
239239
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
240+
(BSD 3 Clause) jmock (org.jmock:jmock-junit4:2.8.4 - http://jmock.org/)
240241
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
241242
(BSD License) ANTLR 4.5.2-1 (org.antlr:antlr4:4.5.2-1 - http://wwww.antlr.org/)
242243
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)

R/pkg/NAMESPACE

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,10 +201,17 @@ exportMethods("%<=>%",
201201
"approxCountDistinct",
202202
"approxQuantile",
203203
"array_contains",
204+
"array_join",
204205
"array_max",
205206
"array_min",
206207
"array_position",
208+
<<<<<<< HEAD
207209
"array_sort",
210+
=======
211+
"array_repeat",
212+
"array_sort",
213+
"arrays_overlap",
214+
>>>>>>> master
208215
"asc",
209216
"ascii",
210217
"asin",
@@ -302,6 +309,7 @@ exportMethods("%<=>%",
302309
"lower",
303310
"lpad",
304311
"ltrim",
312+
"map_entries",
305313
"map_keys",
306314
"map_values",
307315
"max",

R/pkg/R/DataFrame.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2297,6 +2297,8 @@ setMethod("rename",
22972297

22982298
setClassUnion("characterOrColumn", c("character", "Column"))
22992299

2300+
setClassUnion("numericOrColumn", c("numeric", "Column"))
2301+
23002302
#' Arrange Rows by Variables
23012303
#'
23022304
#' Sort a SparkDataFrame by the specified column(s).

R/pkg/R/client.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,11 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack
6363
checkJavaVersion <- function() {
6464
javaBin <- "java"
6565
javaHome <- Sys.getenv("JAVA_HOME")
66+
<<<<<<< HEAD
6667
javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements"))
68+
=======
69+
javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements"))
70+
>>>>>>> master
6771
sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L))
6872
if (javaHome != "") {
6973
javaBin <- file.path(javaHome, "bin", javaBin)
@@ -90,7 +94,12 @@ checkJavaVersion <- function() {
9094
# Extract 8 from it to compare to sparkJavaVersion
9195
javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2])
9296
if (javaVersionNum != sparkJavaVersion) {
97+
<<<<<<< HEAD
9398
stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr))
99+
=======
100+
stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:",
101+
javaVersionStr))
102+
>>>>>>> master
94103
}
95104
}
96105

R/pkg/R/functions.R

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ NULL
189189
#' the map or array of maps.
190190
#' \item \code{from_json}: it is the column containing the JSON string.
191191
#' }
192+
#' @param y Column to compute on.
192193
#' @param value A value to compute on.
193194
#' \itemize{
194195
#' \item \code{array_contains}: a value to be checked if contained in the column.
@@ -207,7 +208,11 @@ NULL
207208
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
208209
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
209210
#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
211+
<<<<<<< HEAD
210212
#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
213+
=======
214+
#' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
215+
>>>>>>> master
211216
#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
212217
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
213218
#' head(tmp2)
@@ -216,12 +221,22 @@ NULL
216221
#' head(select(tmp, sort_array(tmp$v1)))
217222
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))
218223
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
224+
<<<<<<< HEAD
219225
#' head(select(tmp3, map_keys(tmp3$v3)))
220226
#' head(select(tmp3, map_values(tmp3$v3)))
221227
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
222228
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$hp))
223229
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5)))
224230
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
231+
=======
232+
#' head(select(tmp3, map_entries(tmp3$v3), map_keys(tmp3$v3), map_values(tmp3$v3)))
233+
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
234+
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$cyl, df$hp))
235+
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5), arrays_overlap(tmp4$v4, tmp4$v5)))
236+
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))
237+
#' tmp5 <- mutate(df, v6 = create_array(df$model, df$model))
238+
#' head(select(tmp5, array_join(tmp5$v6, "#"), array_join(tmp5$v6, "#", "NULL")))}
239+
>>>>>>> master
225240
NULL
226241

227242
#' Window functions for Column operations
@@ -3006,6 +3021,27 @@ setMethod("array_contains",
30063021
column(jc)
30073022
})
30083023

3024+
#' @details
3025+
#' \code{array_join}: Concatenates the elements of column using the delimiter.
3026+
#' Null values are replaced with nullReplacement if set, otherwise they are ignored.
3027+
#'
3028+
#' @param delimiter a character string that is used to concatenate the elements of column.
3029+
#' @param nullReplacement an optional character string that is used to replace the Null values.
3030+
#' @rdname column_collection_functions
3031+
#' @aliases array_join array_join,Column-method
3032+
#' @note array_join since 2.4.0
3033+
setMethod("array_join",
3034+
signature(x = "Column", delimiter = "character"),
3035+
function(x, delimiter, nullReplacement = NULL) {
3036+
jc <- if (is.null(nullReplacement)) {
3037+
callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, delimiter)
3038+
} else {
3039+
callJStatic("org.apache.spark.sql.functions", "array_join", x@jc, delimiter,
3040+
as.character(nullReplacement))
3041+
}
3042+
column(jc)
3043+
})
3044+
30093045
#' @details
30103046
#' \code{array_max}: Returns the maximum value of the array.
30113047
#'
@@ -3049,6 +3085,29 @@ setMethod("array_position",
30493085
})
30503086

30513087
#' @details
3088+
<<<<<<< HEAD
3089+
=======
3090+
#' \code{array_repeat}: Creates an array containing \code{x} repeated the number of times
3091+
#' given by \code{count}.
3092+
#'
3093+
#' @param count a Column or constant determining the number of repetitions.
3094+
#' @rdname column_collection_functions
3095+
#' @aliases array_repeat array_repeat,Column,numericOrColumn-method
3096+
#' @note array_repeat since 2.4.0
3097+
setMethod("array_repeat",
3098+
signature(x = "Column", count = "numericOrColumn"),
3099+
function(x, count) {
3100+
if (class(count) == "Column") {
3101+
count <- count@jc
3102+
} else {
3103+
count <- as.integer(count)
3104+
}
3105+
jc <- callJStatic("org.apache.spark.sql.functions", "array_repeat", x@jc, count)
3106+
column(jc)
3107+
})
3108+
3109+
#' @details
3110+
>>>>>>> master
30523111
#' \code{array_sort}: Sorts the input array in ascending order. The elements of the input array
30533112
#' must be orderable. NA elements will be placed at the end of the returned array.
30543113
#'
@@ -3063,6 +3122,24 @@ setMethod("array_sort",
30633122
})
30643123

30653124
#' @details
3125+
<<<<<<< HEAD
3126+
=======
3127+
#' \code{arrays_overlap}: Returns true if the input arrays have at least one non-null element in
3128+
#' common. If not and both arrays are non-empty and any of them contains a null, it returns null.
3129+
#' It returns false otherwise.
3130+
#'
3131+
#' @rdname column_collection_functions
3132+
#' @aliases arrays_overlap arrays_overlap,Column-method
3133+
#' @note arrays_overlap since 2.4.0
3134+
setMethod("arrays_overlap",
3135+
signature(x = "Column", y = "Column"),
3136+
function(x, y) {
3137+
jc <- callJStatic("org.apache.spark.sql.functions", "arrays_overlap", x@jc, y@jc)
3138+
column(jc)
3139+
})
3140+
3141+
#' @details
3142+
>>>>>>> master
30663143
#' \code{flatten}: Creates a single array from an array of arrays.
30673144
#' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
30683145
#'
@@ -3076,6 +3153,19 @@ setMethod("flatten",
30763153
column(jc)
30773154
})
30783155

3156+
#' @details
3157+
#' \code{map_entries}: Returns an unordered array of all entries in the given map.
3158+
#'
3159+
#' @rdname column_collection_functions
3160+
#' @aliases map_entries map_entries,Column-method
3161+
#' @note map_entries since 2.4.0
3162+
setMethod("map_entries",
3163+
signature(x = "Column"),
3164+
function(x) {
3165+
jc <- callJStatic("org.apache.spark.sql.functions", "map_entries", x@jc)
3166+
column(jc)
3167+
})
3168+
30793169
#' @details
30803170
#' \code{map_keys}: Returns an unordered array containing the keys of the map.
30813171
#'
@@ -3149,8 +3239,13 @@ setMethod("size",
31493239
#' (or starting from the end if start is negative) with the specified length.
31503240
#'
31513241
#' @rdname column_collection_functions
3242+
<<<<<<< HEAD
31523243
#' @param start an index indicating the first element occuring in the result.
31533244
#' @param length a number of consecutive elements choosen to the result.
3245+
=======
3246+
#' @param start an index indicating the first element occurring in the result.
3247+
#' @param length a number of consecutive elements chosen to the result.
3248+
>>>>>>> master
31543249
#' @aliases slice slice,Column-method
31553250
#' @note slice since 2.4.0
31563251
setMethod("slice",

R/pkg/R/generics.R

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ setGeneric("summarize", function(x, ...) { standardGeneric("summarize") })
624624
#' @rdname summary
625625
setGeneric("summary", function(object, ...) { standardGeneric("summary") })
626626

627-
setGeneric("toJSON", function(x) { standardGeneric("toJSON") })
627+
setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") })
628628

629629
setGeneric("toRDD", function(x) { standardGeneric("toRDD") })
630630

@@ -757,6 +757,10 @@ setGeneric("approxCountDistinct", function(x, ...) { standardGeneric("approxCoun
757757
#' @name NULL
758758
setGeneric("array_contains", function(x, value) { standardGeneric("array_contains") })
759759

760+
#' @rdname column_collection_functions
761+
#' @name NULL
762+
setGeneric("array_join", function(x, delimiter, ...) { standardGeneric("array_join") })
763+
760764
#' @rdname column_collection_functions
761765
#' @name NULL
762766
setGeneric("array_max", function(x) { standardGeneric("array_max") })
@@ -769,10 +773,23 @@ setGeneric("array_min", function(x) { standardGeneric("array_min") })
769773
#' @name NULL
770774
setGeneric("array_position", function(x, value) { standardGeneric("array_position") })
771775

776+
#' @rdname column_collection_functions
777+
#' @name NULL
778+
<<<<<<< HEAD
779+
setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
780+
781+
=======
782+
setGeneric("array_repeat", function(x, count) { standardGeneric("array_repeat") })
783+
772784
#' @rdname column_collection_functions
773785
#' @name NULL
774786
setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
775787

788+
#' @rdname column_collection_functions
789+
#' @name NULL
790+
setGeneric("arrays_overlap", function(x, y) { standardGeneric("arrays_overlap") })
791+
792+
>>>>>>> master
776793
#' @rdname column_string_functions
777794
#' @name NULL
778795
setGeneric("ascii", function(x) { standardGeneric("ascii") })
@@ -1034,6 +1051,10 @@ setGeneric("lpad", function(x, len, pad) { standardGeneric("lpad") })
10341051
#' @name NULL
10351052
setGeneric("ltrim", function(x, trimString) { standardGeneric("ltrim") })
10361053

1054+
#' @rdname column_collection_functions
1055+
#' @name NULL
1056+
setGeneric("map_entries", function(x) { standardGeneric("map_entries") })
1057+
10371058
#' @rdname column_collection_functions
10381059
#' @name NULL
10391060
setGeneric("map_keys", function(x) { standardGeneric("map_keys") })

R/pkg/R/sparkR.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ sparkR.sparkContext <- function(
194194

195195
# Don't use readString() so that we can provide a useful
196196
# error message if the R and Java versions are mismatched.
197-
authSecretLen = readInt(f)
197+
authSecretLen <- readInt(f)
198198
if (length(authSecretLen) == 0 || authSecretLen == 0) {
199199
stop("Unexpected EOF in JVM connection data. Mismatched versions?")
200200
}

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1503,6 +1503,39 @@ test_that("column functions", {
15031503
result <- collect(select(df2, reverse(df2[[1]])))[[1]]
15041504
expect_equal(result, "cba")
15051505

1506+
<<<<<<< HEAD
1507+
=======
1508+
# Test array_repeat()
1509+
df <- createDataFrame(list(list("a", 3L), list("b", 2L)))
1510+
result <- collect(select(df, array_repeat(df[[1]], df[[2]])))[[1]]
1511+
expect_equal(result, list(list("a", "a", "a"), list("b", "b")))
1512+
1513+
result <- collect(select(df, array_repeat(df[[1]], 2L)))[[1]]
1514+
expect_equal(result, list(list("a", "a"), list("b", "b")))
1515+
1516+
# Test arrays_overlap()
1517+
df <- createDataFrame(list(list(list(1L, 2L), list(3L, 1L)),
1518+
list(list(1L, 2L), list(3L, 4L)),
1519+
list(list(1L, NA), list(3L, 4L))))
1520+
result <- collect(select(df, arrays_overlap(df[[1]], df[[2]])))[[1]]
1521+
expect_equal(result, c(TRUE, FALSE, NA))
1522+
1523+
# Test array_join()
1524+
df <- createDataFrame(list(list(list("Hello", "World!"))))
1525+
result <- collect(select(df, array_join(df[[1]], "#")))[[1]]
1526+
expect_equal(result, "Hello#World!")
1527+
df2 <- createDataFrame(list(list(list("Hello", NA, "World!"))))
1528+
result <- collect(select(df2, array_join(df2[[1]], "#", "Beautiful")))[[1]]
1529+
expect_equal(result, "Hello#Beautiful#World!")
1530+
result <- collect(select(df2, array_join(df2[[1]], "#")))[[1]]
1531+
expect_equal(result, "Hello#World!")
1532+
df3 <- createDataFrame(list(list(list("Hello", NULL, "World!"))))
1533+
result <- collect(select(df3, array_join(df3[[1]], "#", "Beautiful")))[[1]]
1534+
expect_equal(result, "Hello#Beautiful#World!")
1535+
result <- collect(select(df3, array_join(df3[[1]], "#")))[[1]]
1536+
expect_equal(result, "Hello#World!")
1537+
1538+
>>>>>>> master
15061539
# Test array_sort() and sort_array()
15071540
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
15081541

@@ -1518,21 +1551,36 @@ test_that("column functions", {
15181551
df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(4L, 5L))))
15191552
result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
15201553
expect_equal(result, list(list(2L, 3L), list(5L)))
1554+
<<<<<<< HEAD
15211555

15221556
# Test concat()
15231557
df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
15241558
list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
15251559
result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
15261560
expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))
15271561

1562+
=======
1563+
1564+
# Test concat()
1565+
df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
1566+
list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
1567+
result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
1568+
expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))
1569+
1570+
>>>>>>> master
15281571
# Test flatten()
15291572
df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
15301573
list(list(list(5L, 6L), list(7L, 8L)))))
15311574
result <- collect(select(df, flatten(df[[1]])))[[1]]
15321575
expect_equal(result, list(list(1L, 2L, 3L, 4L), list(5L, 6L, 7L, 8L)))
15331576

1534-
# Test map_keys(), map_values() and element_at()
1577+
# Test map_entries(), map_keys(), map_values() and element_at()
15351578
df <- createDataFrame(list(list(map = as.environment(list(x = 1, y = 2)))))
1579+
result <- collect(select(df, map_entries(df$map)))[[1]]
1580+
expected_entries <- list(listToStruct(list(key = "x", value = 1)),
1581+
listToStruct(list(key = "y", value = 2)))
1582+
expect_equal(result, list(expected_entries))
1583+
15361584
result <- collect(select(df, map_keys(df$map)))[[1]]
15371585
expect_equal(result, list(list("x", "y")))
15381586

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,8 @@ can be run using:
9999
Please see the guidance on how to
100100
[run tests for a module, or individual tests](http://spark.apache.org/developer-tools.html#individual-tests).
101101

102+
There is also a Kubernetes integration test, see resource-managers/kubernetes/integration-tests/README.md
103+
102104
## A Note About Hadoop Versions
103105

104106
Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported

0 commit comments

Comments
 (0)