|
| 1 | +# minimal implementation of ggplot2 mosaic after ggmosaic was archived in CRAN |
| 2 | +# |
| 3 | +# This was heavily inspired by github.com/haleyjeppson/ggmosaic package but |
| 4 | +# simplified to only support 2 categorical variables |
| 5 | + |
| 6 | +#' Mosaic Rectangles Layer for ggplot2 |
| 7 | +#' |
| 8 | +#' Adds a mosaic-style rectangles layer to a ggplot, visualizing the |
| 9 | +#' joint distribution of categorical variables. |
| 10 | +#' Each rectangle's size reflects the proportion of observations for |
| 11 | +#' combinations of `x` and `fill`. |
| 12 | +#' |
| 13 | +#' @param mapping Set of aesthetic mappings created by `aes()`. Must specify `x` and `fill`. |
| 14 | +#' @param data The data to be displayed in this layer. |
| 15 | +#' @param stat The statistical transformation to use on the data. Defaults to `"rects"`. |
| 16 | +#' @param position Position adjustment. Defaults to `"identity"`. |
| 17 | +#' @param ... Other arguments passed to `layer()`. |
| 18 | +#' @param na.rm Logical. Should missing values be removed? |
| 19 | +#' @param show.legend Logical. Should this layer be included in the legends? |
| 20 | +#' @param inherit.aes Logical. If `FALSE`, overrides default aesthetics. |
| 21 | +#' |
| 22 | +#' @return A ggplot2 layer that adds mosaic rectangles to the plot. |
| 23 | +#' |
| 24 | +#' @examples |
| 25 | +#' df <- data.frame(RACE = c("Black", "White", "Black", "Asian"), SEX = c("M", "M", "F", "F")) |
| 26 | +#' library(ggplot2) |
| 27 | +#' ggplot(df) + |
| 28 | +#' geom_mosaic(aes(x = RACE, fill = SEX)) |
| 29 | +#' @export |
| 30 | +geom_mosaic <- function(mapping = NULL, data = NULL, |
| 31 | + stat = "mosaic", position = "identity", |
| 32 | + ..., |
| 33 | + na.rm = FALSE, # nolint: object_name_linter. |
| 34 | + show.legend = TRUE, # nolint: object_name_linter. |
| 35 | + inherit.aes = TRUE) { # nolint: object_name_linter. |
| 36 | + |
| 37 | + aes_x <- mapping$x |
| 38 | + if (!is.null(aes_x)) { |
| 39 | + aes_x <- list(rlang::quo_get_expr(mapping$x)) |
| 40 | + var_x <- paste0("x__", as.character(aes_x)) |
| 41 | + mapping[[var_x]] <- mapping$x |
| 42 | + } |
| 43 | + |
| 44 | + aes_fill <- mapping$fill |
| 45 | + if (!is.null(aes_fill)) { |
| 46 | + aes_fill <- rlang::quo_text(mapping$fill) |
| 47 | + } |
| 48 | + |
| 49 | + mapping$x <- structure(1L) |
| 50 | + |
| 51 | + layer <- ggplot2::layer( |
| 52 | + geom = GeomMosaic, |
| 53 | + stat = "mosaic", |
| 54 | + data = data, |
| 55 | + mapping = mapping, |
| 56 | + position = position, |
| 57 | + show.legend = show.legend, |
| 58 | + inherit.aes = inherit.aes, |
| 59 | + check.aes = FALSE, |
| 60 | + params = list(na.rm = na.rm, ...) |
| 61 | + ) |
| 62 | + list(layer, .scale_x_mosaic()) |
| 63 | +} |
| 64 | + |
| 65 | +#' @keywords internal |
| 66 | +GeomMosaic <- ggplot2::ggproto( # nolint: object_name_linter. |
| 67 | + "GeomMosaic", ggplot2::GeomRect, |
| 68 | + default_aes = ggplot2::aes( |
| 69 | + colour = NA, linewidth = 0.5, linetype = 1, alpha = 1, fill = "grey30" |
| 70 | + ), |
| 71 | + draw_panel = function(data, panel_params, coord) { |
| 72 | + if (all(is.na(data$colour))) data$colour <- scales::alpha(data$fill, data$alpha) |
| 73 | + ggplot2::GeomRect$draw_panel(data, panel_params, coord) |
| 74 | + }, |
| 75 | + required_aes = c("xmin", "xmax", "ymin", "ymax") |
| 76 | +) |
| 77 | + |
| 78 | +#' @keywords internal |
| 79 | +StatMosaic <- ggplot2::ggproto( # nolint: object_name_linter. |
| 80 | + "StatMosaic", ggplot2::Stat, |
| 81 | + required_aes = c("x", "fill"), |
| 82 | + compute_group = function(data, scales) data, |
| 83 | + compute_panel = function(data, scales) { |
| 84 | + data$x <- data[, grepl("x__", colnames(data))] |
| 85 | + result <- .calculate_coordinates(data) |
| 86 | + |
| 87 | + results_non_zero <- result[result$.n != 0, ] |
| 88 | + breaks <- unique(with(results_non_zero, (xmin + xmax) / 2)) |
| 89 | + labels <- unique(results_non_zero$x) |
| 90 | + result$x <- list(list2env(list(breaks = breaks[breaks != 0], labels = labels[breaks != 0]))) |
| 91 | + |
| 92 | + result$group <- 1 |
| 93 | + result$PANEL <- unique(data$PANEL) |
| 94 | + result |
| 95 | + } |
| 96 | +) |
| 97 | + |
| 98 | +#' Determining scales for mosaics |
| 99 | +#' |
| 100 | +#' @param breaks,labels,minor_breaks One of: |
| 101 | +#' - `NULL` for no breaks / labels. |
| 102 | +#' - [ggplot2::waiver()] for the default breaks / labels computed by the scale. |
| 103 | +#' - A numeric / character vector giving the positions of the breaks / labels. |
| 104 | +#' - A function. |
| 105 | +#' See [ggplot2::scale_x_continuous()] for more details. |
| 106 | +#' @param na.value The value to be used for `NA` values. |
| 107 | +#' @param position For position scales, The position of the axis. |
| 108 | +#' left or right for y axes, top or bottom for x axes. |
| 109 | +#' @param ... other arguments passed to `continuous_scale()`. |
| 110 | +#' @keywords internal |
| 111 | +.scale_x_mosaic <- function(breaks = unique, |
| 112 | + minor_breaks = NULL, |
| 113 | + labels = unique, |
| 114 | + na.value = NA_real_, # nolint: object_name_linter. |
| 115 | + position = "bottom", |
| 116 | + ...) { |
| 117 | + ggplot2::continuous_scale( |
| 118 | + aesthetics = c( |
| 119 | + "x", "xmin", "xmax", "xend", "xintercept", "xmin_final", "xmax_final", |
| 120 | + "xlower", "xmiddle", "xupper" |
| 121 | + ), |
| 122 | + palette = identity, |
| 123 | + breaks = breaks, |
| 124 | + minor_breaks = minor_breaks, |
| 125 | + labels = labels, |
| 126 | + na.value = na.value, |
| 127 | + position = position, |
| 128 | + super = ScaleContinuousMosaic, , |
| 129 | + guide = ggplot2::waiver(), |
| 130 | + ... |
| 131 | + ) |
| 132 | +} |
| 133 | + |
| 134 | +#' @keywords internal |
| 135 | +ScaleContinuousMosaic <- ggplot2::ggproto( # nolint: object_name_linter. |
| 136 | + "ScaleContinuousMosaic", ggplot2::ScaleContinuousPosition, |
| 137 | + train = function(self, x) { |
| 138 | + if (length(x) == 0) { |
| 139 | + return() |
| 140 | + } |
| 141 | + if (is.list(x)) { |
| 142 | + scale_x <- x[[1]] |
| 143 | + # re-assign the scale values now that we have the information - but only if necessary |
| 144 | + if (is.function(self$breaks)) self$breaks <- scale_x$breaks |
| 145 | + if (is.function(self$labels)) self$labels <- as.vector(scale_x$labels) |
| 146 | + return(NULL) |
| 147 | + } |
| 148 | + if (is_discrete(x)) { |
| 149 | + self$range$train(x = c(0, 1)) |
| 150 | + return(NULL) |
| 151 | + } |
| 152 | + self$range$train(x, call = self$call) |
| 153 | + }, |
| 154 | + map = function(self, x, limits = self$get_limits()) { |
| 155 | + if (is_discrete(x)) { |
| 156 | + return(x) |
| 157 | + } |
| 158 | + if (is.list(x)) { |
| 159 | + return(0) |
| 160 | + } # need a number |
| 161 | + scaled <- as.numeric(self$oob(x, limits)) |
| 162 | + ifelse(!is.na(scaled), scaled, self$na.value) |
| 163 | + }, |
| 164 | + dimension = function(self, expand = c(0, 0)) c(-0.05, 1.05) |
| 165 | +) |
| 166 | + |
| 167 | +#' @noRd |
| 168 | +is_discrete <- function(x) is.factor(x) || is.character(x) || is.logical(x) |
| 169 | + |
| 170 | +#' @describeIn geom_mosaic |
| 171 | +#' Computes the coordinates for rectangles in a mosaic plot based |
| 172 | +#' on combinations of `x` and `fill` variables. |
| 173 | +#' For each unique `x` and `fill`, calculates the proportional |
| 174 | +#' widths and heights, stacking rectangles within each `x` group. |
| 175 | +#' |
| 176 | +#' ### Value |
| 177 | +#' |
| 178 | +#' A data frame with columns: `x`, `fill`, `xmin`, `xmax`, `ymin`, `ymax`, |
| 179 | +#' representing the position and size of each rectangle. |
| 180 | +#' |
| 181 | +#' @keywords internal |
| 182 | +.calculate_coordinates <- function(data) { |
| 183 | + # Example: compute rectangles from x and y |
| 184 | + result <- data |> |
| 185 | + # Count combinations of X and Y |
| 186 | + dplyr::count(.data$x, .data$fill, .drop = FALSE) |> |
| 187 | + # Compute total for each X group |
| 188 | + dplyr::mutate( |
| 189 | + .by = .data$x, |
| 190 | + x_total = sum(.data$n), |
| 191 | + prop = .data$n / .data$x_total, |
| 192 | + prop = dplyr::if_else(is.nan(.data$prop), 0, .data$prop) |
| 193 | + ) |> |
| 194 | + dplyr::arrange(dplyr::desc(.data$x_total), .data$x, .data$fill) |> |
| 195 | + # Compute total sample size to turn counts into widths |
| 196 | + dplyr::mutate( |
| 197 | + N_total = dplyr::n(), |
| 198 | + x_width = .data$x_total / .data$N_total |
| 199 | + ) |> |
| 200 | + # Convert counts to x widths |
| 201 | + dplyr::mutate( |
| 202 | + .by = .data$x, |
| 203 | + x_width_last = dplyr::if_else(dplyr::row_number() == dplyr::n(), .data$x_width, 0) |
| 204 | + ) |> |
| 205 | + # Compute x-min/x-max for each group |
| 206 | + dplyr::mutate( |
| 207 | + xmin = cumsum(dplyr::lag(.data$x_width_last, default = 0)), |
| 208 | + xmax = .data$xmin + .data$x_width |
| 209 | + ) |> |
| 210 | + # Compute y-min/y-max for stacked proportions |
| 211 | + dplyr::mutate( |
| 212 | + .by = .data$x, |
| 213 | + ymin = c(0, utils::head(cumsum(.data$prop), -1)), |
| 214 | + ymax = cumsum(.data$prop) |
| 215 | + ) |> |
| 216 | + dplyr::mutate( |
| 217 | + xmin = .data$xmin / max(.data$xmax), |
| 218 | + xmax = .data$xmax / max(.data$xmax), |
| 219 | + xmin = dplyr::if_else(.data$n == 0, 0, .data$xmin + 0.005), |
| 220 | + xmax = dplyr::if_else(.data$n == 0, 0, .data$xmax - 0.005), |
| 221 | + ymin = dplyr::if_else(.data$n == 0, 0, .data$ymin + 0.005), |
| 222 | + ymax = dplyr::if_else(.data$n == 0, 0, .data$ymax - 0.005) |
| 223 | + ) |> |
| 224 | + dplyr::select(.data$x, .data$fill, .data$xmin, .data$xmax, .data$ymin, .data$ymax, .n = .data$n) |
| 225 | + result |
| 226 | +} |
0 commit comments