Skip to content

Commit a11ec23

Browse files
huaxingaoRobert Kruszewski
authored andcommitted
[SPARK-24186][R][SQL] change reverse and concat to collection functions in R
## What changes were proposed in this pull request? reverse and concat are already in functions.R as column string functions. Since now these two functions are categorized as collection functions in scala and python, we will do the same in R. ## How was this patch tested? Add test in test_sparkSQL.R Author: Huaxin Gao <[email protected]> Closes apache#21307 from huaxingao/spark_24186.
1 parent 1b47566 commit a11ec23

File tree

3 files changed

+35
-21
lines changed

3 files changed

+35
-21
lines changed

R/pkg/R/functions.R

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ NULL
208208
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
209209
#' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1)))
210210
#' head(select(tmp, array_position(tmp$v1, 21), array_sort(tmp$v1)))
211-
#' head(select(tmp, flatten(tmp$v1)))
211+
#' head(select(tmp, flatten(tmp$v1), reverse(tmp$v1)))
212212
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
213213
#' head(tmp2)
214214
#' head(select(tmp, posexplode(tmp$v1)))
@@ -218,7 +218,10 @@ NULL
218218
#' tmp3 <- mutate(df, v3 = create_map(df$model, df$cyl))
219219
#' head(select(tmp3, map_keys(tmp3$v3)))
220220
#' head(select(tmp3, map_values(tmp3$v3)))
221-
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))}
221+
#' head(select(tmp3, element_at(tmp3$v3, "Valiant")))
222+
#' tmp4 <- mutate(df, v4 = create_array(df$mpg, df$cyl), v5 = create_array(df$hp))
223+
#' head(select(tmp4, concat(tmp4$v4, tmp4$v5)))
224+
#' head(select(tmp, concat(df$mpg, df$cyl, df$hp)))}
222225
NULL
223226

224227
#' Window functions for Column operations
@@ -1260,9 +1263,9 @@ setMethod("quarter",
12601263
})
12611264

12621265
#' @details
1263-
#' \code{reverse}: Reverses the string column and returns it as a new string column.
1266+
#' \code{reverse}: Returns a reversed string or an array with reverse order of elements.
12641267
#'
1265-
#' @rdname column_string_functions
1268+
#' @rdname column_collection_functions
12661269
#' @aliases reverse reverse,Column-method
12671270
#' @note reverse since 1.5.0
12681271
setMethod("reverse",
@@ -2055,20 +2058,10 @@ setMethod("countDistinct",
20552058

20562059
#' @details
20572060
#' \code{concat}: Concatenates multiple input columns together into a single column.
2058-
#' If all inputs are binary, concat returns an output as binary. Otherwise, it returns as string.
2061+
#' The function works with strings, binary and compatible array columns.
20592062
#'
2060-
#' @rdname column_string_functions
2063+
#' @rdname column_collection_functions
20612064
#' @aliases concat concat,Column-method
2062-
#' @examples
2063-
#'
2064-
#' \dontrun{
2065-
#' # concatenate strings
2066-
#' tmp <- mutate(df, s1 = concat(df$Class, df$Sex),
2067-
#' s2 = concat(df$Class, df$Sex, df$Age),
2068-
#' s3 = concat(df$Class, df$Sex, df$Age, df$Class),
2069-
#' s4 = concat_ws("_", df$Class, df$Sex),
2070-
#' s5 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
2071-
#' head(tmp)}
20722065
#' @note concat since 1.5.0
20732066
setMethod("concat",
20742067
signature(x = "Column"),
@@ -2409,6 +2402,13 @@ setMethod("shiftRightUnsigned", signature(y = "Column", x = "numeric"),
24092402
#' @param sep separator to use.
24102403
#' @rdname column_string_functions
24112404
#' @aliases concat_ws concat_ws,character,Column-method
2405+
#' @examples
2406+
#'
2407+
#' \dontrun{
2408+
#' # concatenate strings
2409+
#' tmp <- mutate(df, s1 = concat_ws("_", df$Class, df$Sex),
2410+
#' s2 = concat_ws("+", df$Class, df$Sex, df$Age, df$Survived))
2411+
#' head(tmp)}
24122412
#' @note concat_ws since 1.5.0
24132413
setMethod("concat_ws", signature(sep = "character", x = "Column"),
24142414
function(sep, x, ...) {
@@ -3063,7 +3063,8 @@ setMethod("array_sort",
30633063
})
30643064

30653065
#' @details
3066-
#' \code{flatten}: Transforms an array of arrays into a single array.
3066+
#' \code{flatten}: Creates a single array from an array of arrays.
3067+
#' If a structure of nested arrays is deeper than two levels, only one level of nesting is removed.
30673068
#'
30683069
#' @rdname column_collection_functions
30693070
#' @aliases flatten flatten,Column-method

R/pkg/R/generics.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,7 @@ setGeneric("collect_set", function(x) { standardGeneric("collect_set") })
817817
#' @rdname column
818818
setGeneric("column", function(x) { standardGeneric("column") })
819819

820-
#' @rdname column_string_functions
820+
#' @rdname column_collection_functions
821821
#' @name NULL
822822
setGeneric("concat", function(x, ...) { standardGeneric("concat") })
823823

@@ -1134,7 +1134,7 @@ setGeneric("regexp_replace",
11341134
#' @name NULL
11351135
setGeneric("repeat_string", function(x, n) { standardGeneric("repeat_string") })
11361136

1137-
#' @rdname column_string_functions
1137+
#' @rdname column_collection_functions
11381138
#' @name NULL
11391139
setGeneric("reverse", function(x) { standardGeneric("reverse") })
11401140

R/pkg/tests/fulltests/test_sparkSQL.R

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,7 +1479,7 @@ test_that("column functions", {
14791479
df5 <- createDataFrame(list(list(a = "010101")))
14801480
expect_equal(collect(select(df5, conv(df5$a, 2, 16)))[1, 1], "15")
14811481

1482-
# Test array_contains(), array_max(), array_min(), array_position() and element_at()
1482+
# Test array_contains(), array_max(), array_min(), array_position(), element_at() and reverse()
14831483
df <- createDataFrame(list(list(list(1L, 2L, 3L)), list(list(6L, 5L, 4L))))
14841484
result <- collect(select(df, array_contains(df[[1]], 1L)))[[1]]
14851485
expect_equal(result, c(TRUE, FALSE))
@@ -1496,6 +1496,13 @@ test_that("column functions", {
14961496
result <- collect(select(df, element_at(df[[1]], 1L)))[[1]]
14971497
expect_equal(result, c(1, 6))
14981498

1499+
result <- collect(select(df, reverse(df[[1]])))[[1]]
1500+
expect_equal(result, list(list(3L, 2L, 1L), list(4L, 5L, 6L)))
1501+
1502+
df2 <- createDataFrame(list(list("abc")))
1503+
result <- collect(select(df2, reverse(df2[[1]])))[[1]]
1504+
expect_equal(result, "cba")
1505+
14991506
# Test array_sort() and sort_array()
15001507
df <- createDataFrame(list(list(list(2L, 1L, 3L, NA)), list(list(NA, 6L, 5L, NA, 4L))))
15011508

@@ -1512,7 +1519,13 @@ test_that("column functions", {
15121519
result <- collect(select(df, slice(df[[1]], 2L, 2L)))[[1]]
15131520
expect_equal(result, list(list(2L, 3L), list(5L)))
15141521

1515-
# Test flattern
1522+
# Test concat()
1523+
df <- createDataFrame(list(list(list(1L, 2L, 3L), list(4L, 5L, 6L)),
1524+
list(list(7L, 8L, 9L), list(10L, 11L, 12L))))
1525+
result <- collect(select(df, concat(df[[1]], df[[2]])))[[1]]
1526+
expect_equal(result, list(list(1L, 2L, 3L, 4L, 5L, 6L), list(7L, 8L, 9L, 10L, 11L, 12L)))
1527+
1528+
# Test flatten()
15161529
df <- createDataFrame(list(list(list(list(1L, 2L), list(3L, 4L))),
15171530
list(list(list(5L, 6L), list(7L, 8L)))))
15181531
result <- collect(select(df, flatten(df[[1]])))[[1]]

0 commit comments

Comments
 (0)