1
- # ' @section Stats:
1
+ # ' Stats
2
2
# '
3
+ # ' @description
3
4
# ' All `stat_*()` functions (like `stat_bin()`) return a layer that
4
5
# ' contains a `Stat*` object (like `StatBin`). The `Stat*`
5
6
# ' object is responsible for rendering the data in the plot.
6
7
# '
8
+ # ' @details
7
9
# ' Each of the `Stat*` objects is a [ggproto()] object, descended
8
10
# ' from the top-level `Stat`, and each implements various methods and
9
- # ' fields. To create a new type of Stat object, you typically will want to
11
+ # ' fields.
12
+ # '
13
+ # ' To create a new type of Stat object, you typically will want to
10
14
# ' override one or more of the following:
11
15
# '
12
- # ' - One of :
13
- # ' `compute_layer(self, data, scales, ...)`,
14
- # ' `compute_panel(self, data, scales, ...)`, or
15
- # ' `compute_group(self, data, scales, ...)`.
16
+ # ' * The `required_aes` and `default_aes` fields.
17
+ # ' * One of the `compute_layer()`, `compute_panel()` or `compute_group()`
18
+ # ' functions. Typically it best to implement `compute_group()` and use the
19
+ # ' higher-up methods when there are substantial performance improvements to
20
+ # ' be gained.
21
+ # ' * The `finish_layer()` method
16
22
# '
17
- # ' `compute_layer()` is called once per layer, `compute_panel()`
18
- # ' is called once per panel, and `compute_group()` is called once per
19
- # ' group. All must return a data frame.
23
+ # ' @section Conventions:
20
24
# '
21
- # ' It's usually best to start by overriding `compute_group`: if
22
- # ' you find substantial performance optimisations, override higher up.
23
- # ' You'll need to read the source code of the default methods to see
24
- # ' what else you should be doing.
25
+ # ' The object name that a new class is assigned to is typically the same as the
26
+ # ' class name. Stat class names are in UpperCamelCase and start with the `Stat*`
27
+ # ' prefix, like `StatNew`.
25
28
# '
26
- # ' `data` is a data frame containing the variables named according
27
- # ' to the aesthetics that they're mapped to. `scales` is a list
28
- # ' containing the `x` and `y` scales. There functions are called
29
- # ' before the facets are trained, so they are global scales, not local
30
- # ' to the individual panels.`...` contains the parameters returned by
31
- # ' `setup_params()`.
32
- # ' - `finish_layer(data, params)`: called once for each layer. Used
33
- # ' to modify the data after scales has been applied, but before the data is
34
- # ' handed of to the geom for rendering. The default is to not modify the
35
- # ' data. Use this hook if the stat needs access to the actual aesthetic
36
- # ' values rather than the values that are mapped to the aesthetic.
37
- # ' - `setup_params(data, params)`: called once for each layer.
38
- # ' Used to setup defaults that need to complete dataset, and to inform
39
- # ' the user of important choices. Should return list of parameters.
40
- # ' - `setup_data(data, params)`: called once for each layer,
41
- # ' after `setup_params()`. Should return modified `data`.
42
- # ' Default methods removes all rows containing a missing value in
43
- # ' required aesthetics (with a warning if `!na.rm`).
44
- # ' - `required_aes`: A character vector of aesthetics needed to
45
- # ' render the geom.
46
- # ' - `default_aes`: A list (generated by [aes()] of
47
- # ' default values for aesthetics.
48
- # ' - `dropped_aes` is a vecor of aesthetic names that are safe to drop after
49
- # ' statistical transformation. A classic example is the `weight` aesthetic
50
- # ' that is consumed during computation of the stat.
29
+ # ' A constructor function is usually paired wih a Stat class. The constructor
30
+ # ' wraps a call t o `layer()`, where e.g. `layer(stat = StatNew)`. The
31
+ # ' constructor function name is formatted by taking the Stat class name and
32
+ # ' formatting it with snake_case, so that `StatNew` becomes `stat_new()`.
51
33
# '
52
- # ' See also the `r link_book("new stats section", "extensions#sec-new-stats")`
53
- # ' @rdname ggplot2-ggproto
54
- # ' @format NULL
55
- # ' @usage NULL
56
34
# ' @export
57
- Stat <- ggproto(" Stat" ,
58
- # Should the values produced by the statistic also be transformed
59
- # in the second pass when recently added statistics are trained to
60
- # the scales
61
- retransform = TRUE ,
35
+ # ' @format NULL
36
+ # ' @usage
37
+ # ' # Creating a new subclass
38
+ # ' StatNew <- ggproto("StatNew", Stat, ...)
39
+ # '
40
+ # ' # Usage in the `layer()` function
41
+ # ' layer(stat = StatNew)
42
+ # ' @seealso The `r link_book("new stats section", "extensions#sec-new-stats")`.
43
+ # ' @seealso Run `vignette("extending-ggplot2")`, in particular the "Creating a
44
+ # ' new stat" section.
45
+ # ' @examples
46
+ # ' # Extending the class
47
+ # ' StatKmeans <- ggproto(
48
+ # ' "StatKmeans", Stat,
49
+ # ' # Fields
50
+ # ' required_aes = c("x", "y"),
51
+ # ' # You can relate computed variables to aesthetics using `after_stat()`
52
+ # ' # in defaults
53
+ # ' default_aes = aes(colour = after_stat(cluster)),
54
+ # ' # Methods
55
+ # ' compute_panel = function(data, scales, k = 2L) {
56
+ # ' km <- kmeans(cbind(scale(data$x), scale(data$y)), centers = k)
57
+ # ' data$cluster <- factor(km$cluster)
58
+ # ' data
59
+ # ' }
60
+ # ' )
61
+ # '
62
+ # ' # Building a constructor
63
+ # ' stat_kmeans <- function(mapping = NULL, data = NULL, geom = "point",
64
+ # ' position = "identity", ..., k = 2L, na.rm = FALSE,
65
+ # ' show.legend = NA, inherit.aes = TRUE) {
66
+ # ' layer(
67
+ # ' mapping = mapping, data = data,
68
+ # ' geom = geom, stat = StatKmeans, position = position,
69
+ # ' show.legend = show.legend, inherit.aes = inherit.aes,
70
+ # ' params = list(na.rm = na.rm, k = k, ...)
71
+ # ' )
72
+ # ' }
73
+ # '
74
+ # ' # Use new stat in plot
75
+ # ' ggplot(mpg, aes(displ, hwy)) +
76
+ # ' stat_kmeans(k = 3)
77
+ Stat <- ggproto(
78
+ " Stat" ,
62
79
63
- default_aes = aes(),
80
+ # Fields ------------------------------------------------------------------
64
81
82
+ # ' @field required_aes A character vector naming aesthetics that are necessary
83
+ # ' to compute the stat.
65
84
required_aes = character (),
66
85
86
+ # ' @field non_missing_aes A character vector naming aesthetics that will cause
87
+ # ' removal if they have missing values.
67
88
non_missing_aes = character (),
68
89
69
- # Any aesthetics that are dropped from the data frame during the
70
- # statistical transformation should be listed here to suppress a
71
- # warning about dropped aesthetics
90
+ # ' @field optional_aes A character vector naming aesthetics that will be
91
+ # ' accepted by `layer()`, but are not required or dscribed in the `default_aes`
92
+ # ' field.
93
+ optional_aes = character (),
94
+
95
+ # ' @field default_aes A [mapping][aes()] of default values for aesthetics.
96
+ # ' Aesthetics can be set to `NULL` to be included as optional aesthetic.
97
+ default_aes = aes(),
98
+
99
+ # ' @field dropped_aes A character vector naming aesthetics that can be dropped
100
+ # ' from the data without warning. Typically used for aesthetics that are
101
+ # ' 'consumed' during computation like `"weight"`.
72
102
dropped_aes = character (),
73
103
74
- optional_aes = character (),
104
+ # ' @field extra_params A character vector of parameter names in addition to
105
+ # ' those imputed from the `compute_panel()` or `compute_groups()` methods.
106
+ # ' This field can be set to include parameters for `setup_data()` methods.
107
+ # ' By default, this only contains `"na.rm"`.
108
+ extra_params = " na.rm" ,
109
+
110
+ # ' @field retransform A scalar boolean: should the values produced by the
111
+ # ' statistic also be transformed in the second pass when recently added
112
+ # ' statistics are trained to the scales
113
+ retransform = TRUE ,
75
114
115
+ # Methods -----------------------------------------------------------------
116
+
117
+ # # compute_statistic ------------------------------------------------------
118
+
119
+ # ' @field setup_params
120
+ # ' **Description**
121
+ # '
122
+ # ' A function method for modifying or checking the parameters based on the
123
+ # ' data. The default method returns the parameters unaltered.
124
+ # '
125
+ # ' **Usage**
126
+ # ' ```r
127
+ # ' Stat$setup_params(data, params)
128
+ # ' ```
129
+ # ' **Arguments**
130
+ # ' \describe{
131
+ # ' \item{`data`}{A data frame with the layer's data.}
132
+ # ' \item{`params`}{A list of current parameters}
133
+ # ' }
134
+ # '
135
+ # ' **Value**
136
+ # '
137
+ # ' A list of parameters
76
138
setup_params = function (data , params ) {
77
139
params
78
140
},
79
141
142
+ # ' @field setup_data
143
+ # ' **Description**
144
+ # '
145
+ # ' A function method for modifying or checking the data. The default method
146
+ # ' returns data unaltered.
147
+ # '
148
+ # ' **Usage**
149
+ # ' ```r
150
+ # ' Stat$setup_data(data, params)
151
+ # ' ```
152
+ # ' **Arguments**
153
+ # ' \describe{
154
+ # ' \item{`data`}{A data frame with the layer's data.}
155
+ # ' \item{`params`}{A list of parameters coming from the `setup_params()`
156
+ # ' method}
157
+ # ' }
158
+ # '
159
+ # ' **Value**
160
+ # '
161
+ # ' A data frame with layer data
80
162
setup_data = function (data , params ) {
81
163
data
82
164
},
83
165
166
+ # ' @field compute_layer
167
+ # ' **Description**
168
+ # '
169
+ # ' A function method for orchestrating the computation of the statistic. The
170
+ # ' default method splits the data and passes on computation tasks to the
171
+ # ' panel-level `compute_panel()` method. In addition, the default method
172
+ # ' handles missing values by removing rows that have missing values for the
173
+ # ' aesthetics listed in the `required_aes` and `non_missing_aes` fields. It is
174
+ # ' not recommended to use this method as an extension point.
175
+ # '
176
+ # ' **Usage**
177
+ # ' ```r
178
+ # ' Stat$compute_layer(data, params, layout)
179
+ # ' ```
180
+ # '
181
+ # ' **Arguments**
182
+ # ' \describe{
183
+ # ' \item{`data`}{A data frame with the layer's data.}
184
+ # ' \item{`params`}{A list of parameters}
185
+ # ' \item{`layout`}{A pre-trained `<Layout>` ggproto object.}
186
+ # ' }
187
+ # '
188
+ # ' **Value**
189
+ # '
190
+ # ' A data frame with computed data
84
191
compute_layer = function (self , data , params , layout ) {
85
192
check_required_aesthetics(
86
193
self $ required_aes ,
87
194
c(names(data ), names(params )),
88
195
snake_class(self )
89
196
)
90
197
198
+ # TODO: for symmetry with Geom, should Stat have separate `handle_na()` method?
91
199
# Make sure required_aes consists of the used set of aesthetics in case of
92
200
# "|" notation in self$required_aes
93
201
required_aes <- intersect(
@@ -117,6 +225,34 @@ Stat <- ggproto("Stat",
117
225
})
118
226
},
119
227
228
+ # ' @field compute_panel,compute_group
229
+ # ' **Description**
230
+ # '
231
+ # ' A function method orchestrating the computation of statistics for a single
232
+ # ' panel or group. The default `compute_panel()` method splits the data into
233
+ # ' groups, and passes on computation tasks to the `compute_group()` method.
234
+ # ' In addition, `compute_panel()` is tasked with preserving aesthetics that
235
+ # ' are constant within a group and preserving these if the computation loses
236
+ # ' them. The default `compute_group()` is not implemented.
237
+ # '
238
+ # ' **Usage**
239
+ # ' ```r
240
+ # ' Stat$compute_panel(data, scales, ...)
241
+ # ' Stat$compute_group(data, scales, ...)
242
+ # ' ```
243
+ # ' **Arguments**
244
+ # ' \describe{
245
+ # ' \item{`data`}{A data frame with the layer's data.}
246
+ # ' \item{`scales`}{A list containing pre-trained `x` and `y` scales.
247
+ # ' Note that these are global scales trained on all data, not panel-specific
248
+ # ' scales.}
249
+ # ' \item{`...`}{Reserved for extensions. By default, this passes parameters
250
+ # ' to the `compute_group()` method.}
251
+ # ' }
252
+ # '
253
+ # ' **Value**
254
+ # '
255
+ # ' A data frame with layer data
120
256
compute_panel = function (self , data , scales , ... ) {
121
257
if (empty(data )) return (data_frame0())
122
258
@@ -182,17 +318,54 @@ Stat <- ggproto("Stat",
182
318
data_new [, ! names(data_new ) %in% non_constant_columns , drop = FALSE ]
183
319
},
184
320
185
- compute_group = function (self , data , scales ) {
186
- cli :: cli_abort(" Not implemented." )
187
- },
188
-
321
+ compute_group = not_implemented(" compute_group" ),
322
+
323
+ # finish_statistics -------------------------------------------------------
324
+
325
+ # ' @field finish_layer
326
+ # ' **Description**
327
+ # '
328
+ # ' A function method acting as a hook to modify data after scales have been
329
+ # ' applied, but before geoms have to render. The default is to pass the data
330
+ # ' unaltered. This can be used as an extension point when actual aesthetic
331
+ # ' values rather than values mapped to the aesthetic are needed.
332
+ # '
333
+ # ' **Usage**
334
+ # ' ```r
335
+ # ' Stat$finish_layer(data, params)
336
+ # ' ```
337
+ # ' **Arguments**
338
+ # ' \describe{
339
+ # ' \item{`data`}{A data frame with layer data}
340
+ # ' \item{`params`}{A list of parameters}
341
+ # ' }
342
+ # '
343
+ # ' **Value**
344
+ # '
345
+ # ' A data frame with layer data
189
346
finish_layer = function (self , data , params ) {
190
347
data
191
348
},
192
349
193
-
194
- # See discussion at Geom$parameters()
195
- extra_params = " na.rm" ,
350
+ # # Utilities ---------------------------------------------------------------
351
+
352
+ # ' @field parameters
353
+ # ' **Description**
354
+ # '
355
+ # ' A function method for listing out all acceptable parameters for this stat.
356
+ # '
357
+ # ' **Usage**
358
+ # ' ```r
359
+ # ' Stat$parameters(extra)
360
+ # ' ```
361
+ # ' **Arguments**
362
+ # ' \describe{
363
+ # ' \item{`extra`}{A boolean: whether to include the `extra_params` field.}
364
+ # ' }
365
+ # '
366
+ # ' **Value**
367
+ # '
368
+ # ' A character vector of parameter names.
196
369
parameters = function (self , extra = FALSE ) {
197
370
# Look first in compute_panel. If it contains ... then look in compute_group
198
371
panel_args <- names(ggproto_formals(self $ compute_panel ))
@@ -208,6 +381,18 @@ Stat <- ggproto("Stat",
208
381
args
209
382
},
210
383
384
+ # ' @field aesthetics
385
+ # ' **Description**
386
+ # '
387
+ # ' A function method for listing out all acceptable aesthetics for this stat.
388
+ # '
389
+ # ' **Usage**
390
+ # ' ```r
391
+ # ' Stat$aesthetics()
392
+ # ' ```
393
+ # ' **Value**
394
+ # '
395
+ # ' A character vector of aesthetic names.
211
396
aesthetics = function (self ) {
212
397
if (is.null(self $ required_aes )) {
213
398
required_aes <- NULL
0 commit comments