Skip to content

Commit 5298171

Browse files
actuaryzhangFelix Cheung
authored andcommitted
[SPARK-20889][SPARKR] Grouped documentation for COLLECTION column methods
## What changes were proposed in this pull request? Grouped documentation for column collection methods. Author: actuaryzhang <[email protected]> Author: Wayne Zhang <[email protected]> Closes apache#18458 from actuaryzhang/sparkRDocCollection.
1 parent fddb63f commit 5298171

File tree

2 files changed

+108
-123
lines changed

2 files changed

+108
-123
lines changed

R/pkg/R/functions.R

Lines changed: 90 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,35 @@ NULL
171171
#' }
172172
NULL
173173

174+
#' Collection functions for Column operations
175+
#'
176+
#' Collection functions defined for \code{Column}.
177+
#'
178+
#' @param x Column to compute on. Note the difference in the following methods:
179+
#' \itemize{
180+
#' \item \code{to_json}: it is the column containing the struct or array of the structs.
181+
#' \item \code{from_json}: it is the column containing the JSON string.
182+
#' }
183+
#' @param ... additional argument(s). In \code{to_json} and \code{from_json}, this contains
184+
#' additional named properties to control how it is converted, accepts the same
185+
#' options as the JSON data source.
186+
#' @name column_collection_functions
187+
#' @rdname column_collection_functions
188+
#' @family collection functions
189+
#' @examples
190+
#' \dontrun{
191+
#' # Dataframe used throughout this doc
192+
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
193+
#' df <- createDataFrame(cbind(model = rownames(mtcars), mtcars))
194+
#' tmp <- mutate(df, v1 = create_array(df$mpg, df$cyl, df$hp))
195+
#' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1)))
196+
#' tmp2 <- mutate(tmp, v2 = explode(tmp$v1))
197+
#' head(tmp2)
198+
#' head(select(tmp, posexplode(tmp$v1)))
199+
#' head(select(tmp, sort_array(tmp$v1)))
200+
#' head(select(tmp, sort_array(tmp$v1, asc = FALSE)))}
201+
NULL
202+
174203
#' @details
175204
#' \code{lit}: A new Column is created to represent the literal value.
176205
#' If the parameter is a Column, it is returned unchanged.
@@ -1642,30 +1671,23 @@ setMethod("to_date",
16421671
column(jc)
16431672
})
16441673

1645-
#' to_json
1646-
#'
1647-
#' Converts a column containing a \code{structType} or array of \code{structType} into a Column
1648-
#' of JSON string. Resolving the Column can fail if an unsupported type is encountered.
1649-
#'
1650-
#' @param x Column containing the struct or array of the structs
1651-
#' @param ... additional named properties to control how it is converted, accepts the same options
1652-
#' as the JSON data source.
1674+
#' @details
1675+
#' \code{to_json}: Converts a column containing a \code{structType} or array of \code{structType}
1676+
#' into a Column of JSON string. Resolving the Column can fail if an unsupported type is encountered.
16531677
#'
1654-
#' @family non-aggregate functions
1655-
#' @rdname to_json
1656-
#' @name to_json
1657-
#' @aliases to_json,Column-method
1678+
#' @rdname column_collection_functions
1679+
#' @aliases to_json to_json,Column-method
16581680
#' @export
16591681
#' @examples
1682+
#'
16601683
#' \dontrun{
16611684
#' # Converts a struct into a JSON object
1662-
#' df <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
1663-
#' select(df, to_json(df$d, dateFormat = 'dd/MM/yyyy'))
1685+
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
1686+
#' select(df2, to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
16641687
#'
16651688
#' # Converts an array of structs into a JSON array
1666-
#' df <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
1667-
#' select(df, to_json(df$people))
1668-
#'}
1689+
#' df2 <- sql("SELECT array(named_struct('name', 'Bob'), named_struct('name', 'Alice')) as people")
1690+
#' df2 <- mutate(df2, people_json = to_json(df2$people))}
16691691
#' @note to_json since 2.2.0
16701692
setMethod("to_json", signature(x = "Column"),
16711693
function(x, ...) {
@@ -2120,28 +2142,28 @@ setMethod("date_format", signature(y = "Column", x = "character"),
21202142
column(jc)
21212143
})
21222144

2123-
#' from_json
2124-
#'
2125-
#' Parses a column containing a JSON string into a Column of \code{structType} with the specified
2126-
#' \code{schema} or array of \code{structType} if \code{as.json.array} is set to \code{TRUE}.
2127-
#' If the string is unparseable, the Column will contains the value NA.
2145+
#' @details
2146+
#' \code{from_json}: Parses a column containing a JSON string into a Column of \code{structType}
2147+
#' with the specified \code{schema} or array of \code{structType} if \code{as.json.array} is set
2148+
#' to \code{TRUE}. If the string is unparseable, the Column will contain the value NA.
21282149
#'
2129-
#' @param x Column containing the JSON string.
2150+
#' @rdname column_collection_functions
21302151
#' @param schema a structType object to use as the schema to use when parsing the JSON string.
21312152
#' @param as.json.array indicating if input string is JSON array of objects or a single object.
2132-
#' @param ... additional named properties to control how the json is parsed, accepts the same
2133-
#' options as the JSON data source.
2134-
#'
2135-
#' @family non-aggregate functions
2136-
#' @rdname from_json
2137-
#' @name from_json
2138-
#' @aliases from_json,Column,structType-method
2153+
#' @aliases from_json from_json,Column,structType-method
21392154
#' @export
21402155
#' @examples
2156+
#'
21412157
#' \dontrun{
2142-
#' schema <- structType(structField("name", "string"),
2143-
#' select(df, from_json(df$value, schema, dateFormat = "dd/MM/yyyy"))
2144-
#'}
2158+
#' df2 <- sql("SELECT named_struct('date', cast('2000-01-01' as date)) as d")
2159+
#' df2 <- mutate(df2, d2 = to_json(df2$d, dateFormat = 'dd/MM/yyyy'))
2160+
#' schema <- structType(structField("date", "string"))
2161+
#' head(select(df2, from_json(df2$d2, schema, dateFormat = 'dd/MM/yyyy')))
2162+
2163+
#' df2 <- sql("SELECT named_struct('name', 'Bob') as people")
2164+
#' df2 <- mutate(df2, people_json = to_json(df2$people))
2165+
#' schema <- structType(structField("name", "string"))
2166+
#' head(select(df2, from_json(df2$people_json, schema)))}
21452167
#' @note from_json since 2.2.0
21462168
setMethod("from_json", signature(x = "Column", schema = "structType"),
21472169
function(x, schema, as.json.array = FALSE, ...) {
@@ -3101,18 +3123,14 @@ setMethod("row_number",
31013123

31023124
###################### Collection functions######################
31033125

3104-
#' array_contains
3105-
#'
3106-
#' Returns null if the array is null, true if the array contains the value, and false otherwise.
3126+
#' @details
3127+
#' \code{array_contains}: Returns null if the array is null, true if the array contains
3128+
#' the value, and false otherwise.
31073129
#'
3108-
#' @param x A Column
31093130
#' @param value A value to be checked if contained in the column
3110-
#' @rdname array_contains
3111-
#' @aliases array_contains,Column-method
3112-
#' @name array_contains
3113-
#' @family collection functions
3131+
#' @rdname column_collection_functions
3132+
#' @aliases array_contains array_contains,Column-method
31143133
#' @export
3115-
#' @examples \dontrun{array_contains(df$c, 1)}
31163134
#' @note array_contains since 1.6.0
31173135
setMethod("array_contains",
31183136
signature(x = "Column", value = "ANY"),
@@ -3121,18 +3139,12 @@ setMethod("array_contains",
31213139
column(jc)
31223140
})
31233141

3124-
#' explode
3125-
#'
3126-
#' Creates a new row for each element in the given array or map column.
3127-
#'
3128-
#' @param x Column to compute on
3142+
#' @details
3143+
#' \code{explode}: Creates a new row for each element in the given array or map column.
31293144
#'
3130-
#' @rdname explode
3131-
#' @name explode
3132-
#' @family collection functions
3133-
#' @aliases explode,Column-method
3145+
#' @rdname column_collection_functions
3146+
#' @aliases explode explode,Column-method
31343147
#' @export
3135-
#' @examples \dontrun{explode(df$c)}
31363148
#' @note explode since 1.5.0
31373149
setMethod("explode",
31383150
signature(x = "Column"),
@@ -3141,18 +3153,12 @@ setMethod("explode",
31413153
column(jc)
31423154
})
31433155

3144-
#' size
3145-
#'
3146-
#' Returns length of array or map.
3147-
#'
3148-
#' @param x Column to compute on
3156+
#' @details
3157+
#' \code{size}: Returns length of array or map.
31493158
#'
3150-
#' @rdname size
3151-
#' @name size
3152-
#' @aliases size,Column-method
3153-
#' @family collection functions
3159+
#' @rdname column_collection_functions
3160+
#' @aliases size size,Column-method
31543161
#' @export
3155-
#' @examples \dontrun{size(df$c)}
31563162
#' @note size since 1.5.0
31573163
setMethod("size",
31583164
signature(x = "Column"),
@@ -3161,25 +3167,16 @@ setMethod("size",
31613167
column(jc)
31623168
})
31633169

3164-
#' sort_array
3165-
#'
3166-
#' Sorts the input array in ascending or descending order according
3170+
#' @details
3171+
#' \code{sort_array}: Sorts the input array in ascending or descending order according
31673172
#' to the natural ordering of the array elements.
31683173
#'
3169-
#' @param x A Column to sort
3174+
#' @rdname column_collection_functions
31703175
#' @param asc A logical flag indicating the sorting order.
31713176
#' TRUE, sorting is in ascending order.
31723177
#' FALSE, sorting is in descending order.
3173-
#' @rdname sort_array
3174-
#' @name sort_array
3175-
#' @aliases sort_array,Column-method
3176-
#' @family collection functions
3178+
#' @aliases sort_array sort_array,Column-method
31773179
#' @export
3178-
#' @examples
3179-
#' \dontrun{
3180-
#' sort_array(df$c)
3181-
#' sort_array(df$c, FALSE)
3182-
#' }
31833180
#' @note sort_array since 1.6.0
31843181
setMethod("sort_array",
31853182
signature(x = "Column"),
@@ -3188,18 +3185,13 @@ setMethod("sort_array",
31883185
column(jc)
31893186
})
31903187

3191-
#' posexplode
3192-
#'
3193-
#' Creates a new row for each element with position in the given array or map column.
3194-
#'
3195-
#' @param x Column to compute on
3188+
#' @details
3189+
#' \code{posexplode}: Creates a new row for each element with position in the given array
3190+
#' or map column.
31963191
#'
3197-
#' @rdname posexplode
3198-
#' @name posexplode
3199-
#' @family collection functions
3200-
#' @aliases posexplode,Column-method
3192+
#' @rdname column_collection_functions
3193+
#' @aliases posexplode posexplode,Column-method
32013194
#' @export
3202-
#' @examples \dontrun{posexplode(df$c)}
32033195
#' @note posexplode since 2.1.0
32043196
setMethod("posexplode",
32053197
signature(x = "Column"),
@@ -3325,27 +3317,24 @@ setMethod("repeat_string",
33253317
column(jc)
33263318
})
33273319

3328-
#' explode_outer
3329-
#'
3330-
#' Creates a new row for each element in the given array or map column.
3320+
#' @details
3321+
#' \code{explode}: Creates a new row for each element in the given array or map column.
33313322
#' Unlike \code{explode}, if the array/map is \code{null} or empty
33323323
#' then \code{null} is produced.
33333324
#'
3334-
#' @param x Column to compute on
33353325
#'
3336-
#' @rdname explode_outer
3337-
#' @name explode_outer
3338-
#' @family collection functions
3339-
#' @aliases explode_outer,Column-method
3326+
#' @rdname column_collection_functions
3327+
#' @aliases explode_outer explode_outer,Column-method
33403328
#' @export
33413329
#' @examples
3330+
#'
33423331
#' \dontrun{
3343-
#' df <- createDataFrame(data.frame(
3332+
#' df2 <- createDataFrame(data.frame(
33443333
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
33453334
#' ))
33463335
#'
3347-
#' head(select(df, df$id, explode_outer(split_string(df$text, ","))))
3348-
#' }
3336+
#' head(select(df2, df2$id, explode_outer(split_string(df2$text, ","))))
3337+
#' head(select(df2, df2$id, posexplode_outer(split_string(df2$text, ","))))}
33493338
#' @note explode_outer since 2.3.0
33503339
setMethod("explode_outer",
33513340
signature(x = "Column"),
@@ -3354,27 +3343,14 @@ setMethod("explode_outer",
33543343
column(jc)
33553344
})
33563345

3357-
#' posexplode_outer
3358-
#'
3359-
#' Creates a new row for each element with position in the given array or map column.
3360-
#' Unlike \code{posexplode}, if the array/map is \code{null} or empty
3346+
#' @details
3347+
#' \code{posexplode_outer}: Creates a new row for each element with position in the given
3348+
#' array or map column. Unlike \code{posexplode}, if the array/map is \code{null} or empty
33613349
#' then the row (\code{null}, \code{null}) is produced.
33623350
#'
3363-
#' @param x Column to compute on
3364-
#'
3365-
#' @rdname posexplode_outer
3366-
#' @name posexplode_outer
3367-
#' @family collection functions
3368-
#' @aliases posexplode_outer,Column-method
3351+
#' @rdname column_collection_functions
3352+
#' @aliases posexplode_outer posexplode_outer,Column-method
33693353
#' @export
3370-
#' @examples
3371-
#' \dontrun{
3372-
#' df <- createDataFrame(data.frame(
3373-
#' id = c(1, 2, 3), text = c("a,b,c", NA, "d,e")
3374-
#' ))
3375-
#'
3376-
#' head(select(df, df$id, posexplode_outer(split_string(df$text, ","))))
3377-
#' }
33783354
#' @note posexplode_outer since 2.3.0
33793355
setMethod("posexplode_outer",
33803356
signature(x = "Column"),

0 commit comments

Comments
 (0)