diff --git a/DESCRIPTION b/DESCRIPTION index 3e4e9af..deb61af 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggsankeyfier Type: Package Title: Create Sankey and Alluvial Diagrams Using 'ggplot2' -Version: 0.1.8.0008 +Version: 0.1.8.0010 Authors@R: c(person("Pepijn", "de Vries", role = c("aut", "cre", "dtc"), email = "pepijn.devries@outlook.com", comment = c(ORCID = "0000-0002-7961-6646")), @@ -38,6 +38,7 @@ Imports: vwline Suggests: knitr, + ragg, rmarkdown, stringr, svglite, diff --git a/NEWS.md b/NEWS.md index 58973c7..1e9f04e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,7 @@ -ggsankeyfier v0.1.8.0008 +ggsankeyfier v0.1.8.0010 ------------- + * Added better stacking order features * Added check workflow * Added code coverage workflow and badge * Improved test coverage diff --git a/R/draw_edges.r b/R/draw_edges.r index 62814b1..d06d064 100644 --- a/R/draw_edges.r +++ b/R/draw_edges.r @@ -4,7 +4,7 @@ data <- data |> dplyr::mutate( res = { - resolution <- .data[["xend"]] - .data[["x"]] + resolution <- .data$xend - .data$x resolution[resolution < 0 & resolution > -1] <- -1 resolution[is.na(resolution)] <- 0 resolution[abs(resolution) < 0.25] <- @@ -22,7 +22,7 @@ # Note that edge_end_size is currently ignored. data$edge_size <- coord$transform( - dplyr::bind_cols(x = 0, y = data[["edge_size"]]), panel_params)[["y"]] - + dplyr::bind_cols(x = 0, y = data$edge_size), panel_params)[["y"]] - coord$transform( dplyr::bind_cols(x = 0, y = rep(0, nrow(data))), panel_params)[["y"]] # For very narrow edges take a small value, to avoid errors (it will not be visible to @@ -98,14 +98,14 @@ ) } }, - x = .data[["x"]], y = .data[["y"]], xend = .data[["xend"]], - yend = .data[["yend"]], y_size = .data[["edge_size"]], slope = .data[["slope"]], - ncp = .data[["ncp"]], - fill = .data[["fill"]], colour = .data[["colour"]], linetype = .data[["linetype"]], - linewidth = .data[["linewidth"]], - alpha = .data[["alpha"]], waist = .data[["waist"]], res = .data[["res"]], - connector = .data[["connector"]], - SIMPLIFY = F) + x = .data$x, y = .data$y, xend = .data$xend, + yend = .data$yend, y_size = .data$edge_size, slope = .data$slope, + ncp = .data$ncp, + fill = .data$fill, colour = .data$colour, linetype = .data$linetype, + linewidth = .data$linewidth, + alpha = .data$alpha, waist = .data$waist, res = .data$res, + connector = .data$connector, + SIMPLIFY = FALSE) ) |> dplyr::pull("bez") return(do.call(grid::gList, result)) diff --git a/R/ecosystem_services.r b/R/ecosystem_services.r index 0f89bef..6610cf6 100644 --- a/R/ecosystem_services.r +++ b/R/ecosystem_services.r @@ -2,7 +2,7 @@ #' #' Data indicating a risk resulting from anthropological activities to the #' marine ecosystem and its capacity to supply services. -#' This data set serves (aggregated from Piet _et al._ (submitted)) as an example +#' This data set serves (aggregated from Piet _et al._ (2024)) as an example #' to illustrate the package's features. #' @format `ecosystem_services` is a `data.frame` with #' `r nrow(ecosystem_services)` rows and `r ncol(ecosystem_services)` columns. @@ -36,9 +36,10 @@ #' for additional decoration of a Sankey diagram. It is therefore more detailed than #' the first alternative. #' @references -#' Piet GJ, Bentley J, Jongbloed RH, Grundlehner A, Tamis JE, De Vries P -#' (submitted) A Cumulative Impact Assessment on the North Sea Capacity to -#' Supply Ecosystem Services. \doi{10.2139/ssrn.4450241} +#' Piet GJ, Bentley JW, Jongbloed RH, Grundlehner A, Tamis JE, De Vries P (2024) +#' A Cumulative Impact Assessment on the North Sea Capacity to Supply Ecosystem +#' Services. Science of The Total Environment (498) +#' [DOI:10.1016/j.scitotenv.2024.174149](https://doi.org/10.1016/j.scitotenv.2024.174149) #' @docType data #' @author Pepijn de Vries, Gerjan Piet, Jacob Bentley, Ruud Jongbloed, Anne Grundlehner, Jacqueline Tamis #' @examples diff --git a/R/geom_edge.r b/R/geom_edge.r index 963d6e7..382c7ad 100644 --- a/R/geom_edge.r +++ b/R/geom_edge.r @@ -70,7 +70,7 @@ geom_sankeyedge <- position = "sankey", na.rm = FALSE, show.legend = NA, slope = 0.5, ncp = 100, width = "auto", align = c("bottom", "top", "center", "justify"), - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, nudge_x = 0, nudge_y = 0, split_nodes = FALSE, split_tol = 1e-3, diff --git a/R/geom_node.r b/R/geom_node.r index 421ed46..5457823 100644 --- a/R/geom_node.r +++ b/R/geom_node.r @@ -50,7 +50,7 @@ geom_sankeynode <- stat = "sankeynode", position = "sankey", na.rm = FALSE, show.legend = NA, width = "auto", align = c("bottom", "top", "center", "justify"), - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, nudge_x = 0, nudge_y = 0, split_nodes = FALSE, split_tol = 1e-3, diff --git a/R/geom_segment.r b/R/geom_segment.r index 610e784..35be9d8 100644 --- a/R/geom_segment.r +++ b/R/geom_segment.r @@ -19,7 +19,7 @@ GeomSankeysegment <- geom_sankeysegment <- function(mapping = NULL, data = NULL, stat = "sankeyedge", position = "sankey", na.rm = FALSE, show.legend = NA, - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), width = "auto", align = c("bottom", "top", "center", "justify"), h_space = "auto", v_space = 0, nudge_x = 0, nudge_y = 0, diff --git a/R/pivot_stages.r b/R/pivot_stages.r index 9dcf2fd..3499845 100644 --- a/R/pivot_stages.r +++ b/R/pivot_stages.r @@ -65,7 +65,7 @@ pivot_stages_longer <- result <- data |> dplyr::ungroup() |> - dplyr::select(union(union(stages_from, values_from), additional_aes_from)) |> + dplyr::select(dplyr::any_of(union(union(stages_from, values_from), additional_aes_from))) |> dplyr::mutate(dplyr::across(dplyr::any_of(stages_from), ~ { if (is.factor(.)) . else { factor(., unique(.)) @@ -79,10 +79,10 @@ pivot_stages_longer <- result <- lapply(utils::head(seq_along(stages_from), -1), function(i) { result |> - dplyr::select(stages_from[i + (0:1)]) |> + dplyr::select(dplyr::any_of(stages_from[i + (0:1)])) |> dplyr::rename(node_from = stages_from[i], node_to = stages_from[i + 1]) |> dplyr::bind_cols( - result |> dplyr::select(union(values_from, additional_aes_from)) + result |> dplyr::select(dplyr::any_of(union(values_from, additional_aes_from))) ) |> dplyr::group_by(dplyr::across(dplyr::any_of(c("node_from", "node_to", additional_aes_from)))) |> dplyr::summarise( @@ -102,7 +102,7 @@ pivot_stages_longer <- tidyr::nest(to = c("node", "stage")) |> tidyr::pivot_longer(c("from", "to"), names_to = "connector") |> tidyr::unnest("value") |> - dplyr::mutate(node = factor(as.character(.data[["node"]]), lvls)) + dplyr::mutate(node = factor(as.character(.data$node), lvls)) return(result) } diff --git a/R/position.r b/R/position.r index c1c867b..75be740 100644 --- a/R/position.r +++ b/R/position.r @@ -17,7 +17,18 @@ #' nodes and edges in a plot. #' Should be one of: `ascending` (default), sorts nodes and edges from large to small #' (largest on top); `descending` sorts nodes and edges from small to large (smallest -#' on top); `as_is` will leave the order of nodes and edges as they are in `data`. +#' on top); `ascending+` Same as `ascending` but it also arranges edges and nodes +#' by its aesthetics; `descending+` Same as `descending` but it also +#' arranges edges/nodes by its aesthetics; +#' `as_is` will leave the order of nodes and edges as they are in `data`. +#' +#' You can also provide a custom function to control the stacking order of nodes and +#' edges. The function needs to accept one argument (`data`) which can be either +#' nodes or edges data. The function needs to add a column named `node_order` containing +#' numbers by which the nodes need to be ordered. In case of edges you need to add +#' two columns. One named `edge_order`, controlling the order of outgoing edges, +#' and one named `edge_order_end` controlling the order of incoming edges. +#' For more details see `vignette("stacking_order")`. #' @param h_space Horizontal space between split nodes (`numeric`). This argument is #' ignored when `split_nodes == FALSE`. Use `"auto"` to automatically position split nodes. #' @param v_space Vertical space between nodes (`numeric`). When set to zero (`0`), @@ -88,12 +99,12 @@ PositionSankey <- #' @export position_sankey <- function(width = "auto", align = c("bottom", "top", "center", "justify"), - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, nudge_x = 0, nudge_y = 0, split_nodes = FALSE, split_tol = 1e-3, direction = c("forward", "backward"), ...) { + if (is.character(order)) order <- rlang::arg_match(order) align <- rlang::arg_match(align) - order <- rlang::arg_match(order) direction <- rlang::arg_match(direction) ggplot2::ggproto( diff --git a/R/position_helpers.r b/R/position_helpers.r index 7b1d357..5c28d64 100644 --- a/R/position_helpers.r +++ b/R/position_helpers.r @@ -5,10 +5,10 @@ data <- dplyr::bind_rows( data |> - dplyr::select(c("PANEL", "node_id", "x", "y")) |> + dplyr::select(dplyr::any_of(c("PANEL", "node_id", "x", "y"))) |> dplyr::mutate(connector = "from"), data |> - dplyr::select(c("PANEL", node_id = "node_id_end", x = "xend", y = "yend")) |> + dplyr::select(dplyr::any_of(c("PANEL", node_id = "node_id_end", x = "xend", y = "yend"))) |> dplyr::mutate(connector = "to") ) } @@ -21,26 +21,17 @@ dplyr::summarise(max_size = sum(.data$node_size), n_nodes = dplyr::n()) - } else if (!"node_size" %in% names(data)) { - dplyr::bind_rows( - data |> dplyr::select(c("PANEL", "x", "node_id", - node_size = "y_node_size")), - data |> dplyr::select(c("PANEL", x = "xend", node_id = "node_id_to", - node_size = "yend_node_size")) - ) |> - dplyr::distinct() |> - .group_across("PANEL", "x") |> - dplyr::summarise(n_nodes = dplyr::n(), max_size = sum(.data[["node_size"]])) |> - dplyr::ungroup() } else { + data |> - dplyr::select(c("PANEL", "x", "connector", "node_id", "node_size")) |> + dplyr::select(dplyr::any_of(c("PANEL", "x", "connector", "node_id", "node_size"))) |> dplyr::distinct() |> .group_across("PANEL", "x", "node_id") |> - dplyr::summarise(node_size = max(.data[["node_size"]])) |> + dplyr::summarise(node_size = max(.data$node_size)) |> .group_across("PANEL", "x") |> - dplyr::summarise(n_nodes = dplyr::n(), max_size = sum(.data[["node_size"]])) |> + dplyr::summarise(n_nodes = dplyr::n(), max_size = sum(.data$node_size)) |> dplyr::ungroup() + } } @@ -94,34 +85,41 @@ .compute_layer_edge_positions(self, data, params, scales) } else { .compute_layer_node_positions(self, data, params, scales) |> - dplyr::filter(!.data[["duplicated"]]) + dplyr::filter(!.data$duplicated) } } -.order_edges <- function(data, order, which = "start") { - data <- data |> - .group_across("PANEL", - ifelse(which == "start", "x_raw", "xend_raw"), - ifelse(which == "start", "group", "group_end")) |> - dplyr::mutate(temp = { - if (order == "ascending") { - dplyr::dense_rank(if (which == "start") .data[["y"]] else .data[["yend"]]) - } else if (order == "descending") { - dplyr::dense_rank(if (which == "start") -.data[["y"]] else -.data[["yend"]]) - } else { - seq_len(dplyr::n()) - } - }) |> - dplyr::ungroup() - if (which == "start") { - data <- data |> - dplyr::arrange(.data[["PANEL"]], .data[["x_raw"]], .data[["group"]], .data[["temp"]]) |> - dplyr::rename(edge_order = "temp") +.order_objects <- function(data, order) { + order_aes <- endsWith(order, "+") + order <- gsub("\\+$", "", order) + if ("edge_id" %in% names(data)) { + .order_edges(data, order, "start", order_aes) |> + .order_edges(order, "end", order_aes) } else { + .order_nodes(data, order) + } +} + +.order_edges <- function(data, order, which = "start", order_by_aes = FALSE) { + if (order_by_aes) { + extra_sort <- GeomSankeyedge$aesthetics() + extra_sort <- + extra_sort[!extra_sort %in% c("x", "y", "group", "connector", "edge_id")] + extra_sort <- names(data)[names(data) %in% extra_sort] + } else extra_sort <- NULL + + xr <- ifelse(which == "start", "x_raw", "xend_raw") + y_ <- ifelse(which == "start", "y", "yend") + gr <- ifelse(which == "start", "group", "group_end") + if (order != "as_is") { data <- data |> - dplyr::arrange(.data[["PANEL"]], .data[["xend_raw"]], .data[["group_end"]], .data[["temp"]]) |> - dplyr::rename(edge_order_end = "temp") + dplyr::arrange(dplyr::across(dplyr::any_of(c("PANEL", xr, gr, extra_sort, y_)))) } + data <- data |> + dplyr::ungroup() |> + dplyr::mutate(temp = dplyr::row_number() * ifelse(order == "ascending", 1, -1)) + if (which == "start") data <- dplyr::rename(data, edge_order = "temp") + if (which == "end") data <- dplyr::rename(data, edge_order_end = "temp") return(data) } @@ -131,79 +129,87 @@ dplyr::mutate( node_order = { if(order == "ascending") { - dplyr::row_number(.data[["align_offset"]]) + dplyr::row_number(.data$y) } else if (order == "descending") { - dplyr::row_number(-.data[["align_offset"]]) + dplyr::row_number(-.data$y) } else { - seq_len(dplyr::n()) + dplyr::row_number() } } ) |> - dplyr::arrange(.data[["PANEL"]], .data[["x"]], .data[["node_order"]]) + dplyr::ungroup() } .compute_layer_node_positions <- function(self, data, params, scales) { if (missing(params)) params <- .setup_params_position(self, data) data <- data |> .add_node_id() + + if (is.character(params$order)) + order_fun <- \(x) .order_objects(x, params$order) else + if (is.function(params$order)) order_fun <- params$order + rhs <- .group_across(data, "PANEL", "x", "group") |> - dplyr::summarise(align_offset = max(.data[["y"]]), .groups = "keep") |> - .order_nodes(params$order) + dplyr::summarise(y = max(.data$y), .groups = "keep") |> + order_fun() data |> dplyr::left_join(.stage_params(data, params), "x") |> - dplyr::left_join(rhs |> - .group_across("PANEL", "x") |> - dplyr::mutate(y_cum = cumsum(.data[["align_offset"]]) - - .data[["align_offset"]]/2, - n_nodes = dplyr::n_distinct(.data[["group"]]), - ytot = sum(.data[["align_offset"]]), - align_offset = .data[["ytot"]] + - (.data[["n_nodes"]] - 1)*params$v_space), - c("PANEL", "x", "group")) |> + dplyr::left_join( + rhs |> + .group_across("PANEL", "x") |> + dplyr::arrange(.data$node_order) |> + dplyr::mutate(y_cum = cumsum(.data$y) - + .data$y/2, + n_nodes = dplyr::n_distinct(.data$group), + ytot = sum(.data$y), + align_offset = .data$ytot + + (.data$n_nodes - 1)*params$v_space), + c("PANEL", "x", "group")) |> + dplyr::arrange(.data$node_order) |> dplyr::ungroup() |> dplyr::mutate( - y = .data[["y_cum"]], - ymin = .data[["y"]] - .data[["node_size"]]/2, - ymax = .data[["y"]] + .data[["node_size"]]/2, - xmin = .data[["x"]], - xmax = .data[["x"]]) |> - dplyr::select(-"y_cum") |> + y = .data$y_cum, + ymin = .data$y - .data$node_size/2, + ymax = .data$y + .data$node_size/2, + xmin = .data$x, + xmax = .data$x) |> + dplyr::select(!dplyr::any_of("y_cum")) |> .group_across("PANEL") |> dplyr::mutate( v_space = if (params$align == "justify") { - (max(.data[["align_offset"]]) - .data[["ytot"]])/ifelse(.data[["n_nodes"]] > 1, - .data[["n_nodes"]] - 1, 1) + (max(.data$align_offset) - .data$ytot)/ifelse(.data$n_nodes > 1, + .data$n_nodes - 1, 1) } else params$v_space, align_offset = switch( params$align, bottom = 0, - top = max(.data[["align_offset"]]) - .data[["align_offset"]], - center = (max(.data[["align_offset"]]) - .data[["align_offset"]])/2, + top = max(.data$align_offset) - .data$align_offset, + center = (max(.data$align_offset) - .data$align_offset)/2, justify = 0, 0) ) |> .group_across("PANEL", "x", "node_id") |> dplyr::mutate( - dissimilar = if (dplyr::n() == 1 || max(.data[["node_size"]]) == 0) FALSE else - (max(abs(diff(.data[["node_size"]])))/max(.data[["node_size"]])) > .data$split_tol[[1]], - split = .data$split_nodes[[1]] | .data[["dissimilar"]], - duplicated = !.data[["split"]] & duplicated(.data[["node_id"]]) & !.data[["dissimilar"]], - v_space = max(.data[["v_space"]]) + dissimilar = if (dplyr::n() == 1 || max(.data$node_size) == 0) FALSE else + (max(abs(diff(.data$node_size)))/max(.data$node_size)) > .data$split_tol[[1]], + split = .data$split_nodes[[1]] | .data$dissimilar, + duplicated = !.data$split & duplicated(.data$node_id) & !.data$dissimilar, + v_space = max(.data$v_space) ) |> .group_across("PANEL", "x", "connector") |> dplyr::mutate( - y_offset = (.data[["node_order"]] - 1)*.data[["v_space"]][[1]] + - .data[["align_offset"]], - x_offset = ifelse(.data[["split"]], .data[["h_space"]]* - ifelse(.data[["connector"]] == "from", .5, -.5), 0), - y = .data[["y"]] + .data[["y_offset"]] + params$nudge_y, - ymin = .data[["ymin"]] + .data[["y_offset"]] + params$nudge_y, - ymax = .data[["ymax"]] + .data[["y_offset"]] + params$nudge_y, - x = .data[["x"]] + .data[["x_offset"]] + params$nudge_x, - xmin = .data[["xmin"]] - .data[["width"]]/ifelse(.data[["split"]], 4, 2) + - .data[["x_offset"]] + params$nudge_x, - xmax = .data[["xmax"]] + .data[["width"]]/ifelse(.data[["split"]], 4, 2) + - .data[["x_offset"]] + params$nudge_x + y_offset = (rank(.data$node_order, ties.method = "first") - 1)*.data$v_space[[1]] + + .data$align_offset, + x_offset = ifelse(.data$split, .data$h_space* + ifelse(.data$connector == "from", .5, -.5), 0), + y = .data$y + .data$y_offset + params$nudge_y, + ymin = .data$ymin + .data$y_offset + params$nudge_y, + ymax = .data$ymax + .data$y_offset + params$nudge_y, + x = .data$x + .data$x_offset + params$nudge_x, + xmin = .data$xmin - .data$width/ifelse(.data$split, 4, 2) + + .data$x_offset + params$nudge_x, + xmax = .data$xmax + .data$width/ifelse(.data$split, 4, 2) + + .data$x_offset + params$nudge_x ) } @@ -230,22 +236,26 @@ function(self, data, params, scales) { params <- .setup_params_position(self, data) + if (is.character(params$order)) + order_fun <- \(x) .order_objects(x, params$order) else + if (is.function(params$order)) order_fun <- params$order + nodes <- dplyr::bind_rows( data |> - dplyr::select(c("PANEL", "x", "y", "group", "edge_id")) |> + dplyr::select(dplyr::any_of(c("PANEL", "x", "y", "group", "edge_id"))) |> dplyr::mutate(connector = "from"), data |> - dplyr::select(c("PANEL", x = "xend", y = "yend", group = "group_to", "edge_id")) |> + dplyr::select(dplyr::any_of(c("PANEL", x = "xend", y = "yend", group = "group_to", "edge_id"))) |> dplyr::mutate(connector = "to") ) |> - dplyr::filter(!(is.na(.data[["x"]]) & is.na(.data[["y"]]))) + dplyr::filter(!(is.na(.data$x) & is.na(.data$y))) nodes <- .compute_panel_statnodes(self, nodes, params, scales) nodes <- .compute_layer_node_positions(self, nodes, params) |> dplyr::ungroup() |> - dplyr::mutate(x_node = .data[["x"]], x_raw = .data[["x"]] - .data[["x_offset"]]) |> + dplyr::mutate(x_node = .data$x, x_raw = .data$x - .data$x_offset) |> tidyr::unnest("edge_id") |> - dplyr::select(c("PANEL", "connector", "edge_id", "x_node", "x_raw", "split", "width", - y_node = "y", y_node_size = "node_size")) + dplyr::select(dplyr::any_of(c("PANEL", "connector", "edge_id", "x_node", "x_raw", "split", "width", + y_node = "y", y_node_size = "node_size"))) data |> dplyr::rename_with(~gsub("_to$", "_end", .), dplyr::ends_with("_to")) |> @@ -253,23 +263,24 @@ dplyr::left_join(nodes, by = c("PANEL", "connector", "edge_id")) |> dplyr::left_join( nodes |> - dplyr::filter(.data[["connector"]] == "to") |> + dplyr::filter(.data$connector == "to") |> dplyr::mutate(connector = "from") |> dplyr::rename(xend_node = "x_node", xend_raw = "x_raw", splitend = "split", widthend = "width", yend_node = "y_node", yend_node_size = "y_node_size"), by = c("PANEL", "connector", "edge_id")) |> - .order_edges(params$order, "start") |> + order_fun() |> + dplyr::arrange(.data$edge_order) |> .group_across("PANEL", "x", "group") |> dplyr::mutate( - edge_size = .data[["y"]], - x = .data[["x_node"]] + .data[["width"]]/ifelse(.data[["split"]], 4, 2), - y = cumsum(.data[["y"]]) - .data[["y"]]/2 + .data[["y_node"]] - .data[["y_node_size"]]/2) |> - .order_edges(params$order, "end") |> + edge_size = .data$y, + x = .data$x_node + .data$width/ifelse(.data$split, 4, 2), + y = cumsum(.data$y) - .data$y/2 + .data$y_node - .data$y_node_size/2) |> + dplyr::arrange(.data$edge_order_end) |> .group_across("PANEL", "xend", "group_end") |> dplyr::mutate( - edge_end_size = .data[["yend"]], - xend = .data[["xend_node"]] - .data[["widthend"]]/ifelse(.data[["splitend"]], 4, 2), - yend = cumsum(.data[["yend"]]) - .data[["yend"]]/2 + .data[["yend_node"]] - .data[["yend_node_size"]]/2) |> + edge_end_size = .data$yend, + xend = .data$xend_node - .data$widthend/ifelse(.data$splitend, 4, 2), + yend = cumsum(.data$yend) - .data$yend/2 + .data$yend_node - .data$yend_node_size/2) |> .swap_ends_if(params$direction == "backward") } diff --git a/R/stat_node_helpers.r b/R/stat_node_helpers.r index 2ad6835..fb20aaa 100644 --- a/R/stat_node_helpers.r +++ b/R/stat_node_helpers.r @@ -6,13 +6,13 @@ .add_node_id() |> .group_across("PANEL", "x", "group", "connector", "node_id") |> dplyr::summarise( - dplyr::across(-dplyr::any_of(c("y", "edge_id")), ~{ + dplyr::across(!dplyr::any_of(c("y", "edge_id")), ~{ if (is.numeric(.)) sum(.) else .[[1]] }), - edge_id = list(.data[["edge_id"]]), - y = sum(.data[["y"]]), .groups = "keep") |> + edge_id = list(.data$edge_id), + y = sum(.data$y), .groups = "keep") |> .group_across("PANEL", "x", "group") |> - dplyr::mutate(node_size = .data[["y"]]) |> + dplyr::mutate(node_size = .data$y) |> dplyr::ungroup() } diff --git a/README.Rmd b/README.Rmd index 76f9fb7..3e1b786 100644 --- a/README.Rmd +++ b/README.Rmd @@ -145,7 +145,7 @@ The package gives you much control on the positioning of elements in the diagram * introducing a horizontal split in nodes * stacking order of nodes and edges -`vignette("positioning")` will show you how. +`vignette("positioning")` and `vignette("stacking_order")` will show you how. ### Decorating nodes and edges @@ -176,6 +176,7 @@ Dutch Ministry of Agriculture, Nature and Food Quality ## Resources - * Piet GJ, Jongbloed RH, Bentley JW, Grundlehner A, Tamis JE, De Vries P (_in prep._) - A Cumulative Impact Assessment on the North Sea Capacity to Supply Ecosystem Services - [DOI:10.2139/ssrn.4760674](http://dx.doi.org/10.2139/ssrn.4760674) + * Piet GJ, Bentley JW, Jongbloed RH, Grundlehner A, Tamis JE, De Vries P (2024) + A Cumulative Impact Assessment on the North Sea Capacity to Supply Ecosystem Services. + Science of The Total Environment (498) + [DOI:10.1016/j.scitotenv.2024.174149](https://doi.org/10.1016/j.scitotenv.2024.174149) diff --git a/README.md b/README.md index d7f5202..b1e8769 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,8 @@ diagram. Think of: - introducing a horizontal split in nodes - stacking order of nodes and edges -`vignette("positioning")` will show you how. +`vignette("positioning")` and `vignette("stacking_order")` will show you +how. ### Decorating nodes and edges @@ -187,7 +188,7 @@ from the Dutch Ministry of Agriculture, Nature and Food Quality ## Resources -- Piet GJ, Jongbloed RH, Bentley JW, Grundlehner A, Tamis JE, De Vries P - (*in prep.*) A Cumulative Impact Assessment on the North Sea Capacity - to Supply Ecosystem Services - [DOI:10.2139/ssrn.4760674](http://dx.doi.org/10.2139/ssrn.4760674) +- Piet GJ, Jongbloed RH, Bentley JW, Grundlehner A, Tamis JE, De Vries + P (2024) A Cumulative Impact Assessment on the North Sea Capacity to + Supply Ecosystem Services. Science of The Total Environment (498) + [DOI:10.1016/j.scitotenv.2024.174149](https://doi.org/10.1016/j.scitotenv.2024.174149) diff --git a/cran-comments.md b/cran-comments.md index 92c2f45..c336165 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,41 +1,5 @@ -## Second submission - -My apologies for the oversight. This is now fixed. - - > Found the following (possibly) invalid URLs: - URL: [https:://doi.org/10.2139/ssrn.4450241]https:://doi.org/10.2139/ssrn.4450241 - From: inst/doc/data_management.html - Status: Error - Message: URL rejected: Port number was not a decimal number - between 0 and 65535 - > Indeed, there is a doubled colon in "https::" - > Pls write this as https://doi.org/10.2139/ssrn.4450241 - > Please fix and resubmit. - -## Test environments - - * local Windows 10 install, R 4.3.2 - * winbuilder - * rhub check_for_cran - - > Possibly misspelled words in DESCRIPTION: - Sankey (3:15, 24:14) - -This is a common name for a particular type of -diagrams, see: - - - > Found the following (possibly) invalid URLs: - URL: https:://doi.org/10.2139/ssrn.4450241 - From: inst/doc/data_management.html - Status: Error - Message: URL rejected: Port number was not a decimal number between 0 and 65535 - -This is most likely a temporary issue on the DOI -server. The URL does work in a web browser. - ## R CMD check results -0 errors | 0 warnings | 1 note +0 errors | 0 warnings | 0 notes -* This is a new release. +* Release version TODO diff --git a/man/ecosystem_services.Rd b/man/ecosystem_services.Rd index 3779364..8fe04ae 100644 --- a/man/ecosystem_services.Rd +++ b/man/ecosystem_services.Rd @@ -68,7 +68,7 @@ a Sankey diagram. \description{ Data indicating a risk resulting from anthropological activities to the marine ecosystem and its capacity to supply services. -This data set serves (aggregated from Piet \emph{et al.} (submitted)) as an example +This data set serves (aggregated from Piet \emph{et al.} (2024)) as an example to illustrate the package's features. } \examples{ @@ -96,9 +96,10 @@ if (requireNamespace("stringr")) { } } \references{ -Piet GJ, Bentley J, Jongbloed RH, Grundlehner A, Tamis JE, De Vries P -(submitted) A Cumulative Impact Assessment on the North Sea Capacity to -Supply Ecosystem Services. \doi{10.2139/ssrn.4450241} +Piet GJ, Bentley JW, Jongbloed RH, Grundlehner A, Tamis JE, De Vries P (2024) +A Cumulative Impact Assessment on the North Sea Capacity to Supply Ecosystem +Services. Science of The Total Environment (498) +\href{https://doi.org/10.1016/j.scitotenv.2024.174149}{DOI:10.1016/j.scitotenv.2024.174149} } \author{ Pepijn de Vries, Gerjan Piet, Jacob Bentley, Ruud Jongbloed, Anne Grundlehner, Jacqueline Tamis diff --git a/man/figures/README-general_illustration-1.svg b/man/figures/README-general_illustration-1.svg index 9e87943..f7a10ab 100644 --- a/man/figures/README-general_illustration-1.svg +++ b/man/figures/README-general_illustration-1.svg @@ -222,21 +222,21 @@ - - + + + + + - + + + + - - - - - - - + diff --git a/man/geom_sankeyedge.Rd b/man/geom_sankeyedge.Rd index d2de0e5..b6fe870 100644 --- a/man/geom_sankeyedge.Rd +++ b/man/geom_sankeyedge.Rd @@ -22,7 +22,7 @@ geom_sankeysegment( position = "sankey", na.rm = FALSE, show.legend = NA, - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), width = "auto", align = c("bottom", "top", "center", "justify"), h_space = "auto", @@ -49,7 +49,7 @@ geom_sankeyedge( ncp = 100, width = "auto", align = c("bottom", "top", "center", "justify"), - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, nudge_x = 0, @@ -121,7 +121,18 @@ display.} nodes and edges in a plot. Should be one of: \code{ascending} (default), sorts nodes and edges from large to small (largest on top); \code{descending} sorts nodes and edges from small to large (smallest -on top); \code{as_is} will leave the order of nodes and edges as they are in \code{data}.} +on top); \verb{ascending+} Same as \code{ascending} but it also arranges edges and nodes +by its aesthetics; \verb{descending+} Same as \code{descending} but it also +arranges edges/nodes by its aesthetics; +\code{as_is} will leave the order of nodes and edges as they are in \code{data}. + +You can also provide a custom function to control the stacking order of nodes and +edges. The function needs to accept one argument (\code{data}) which can be either +nodes or edges data. The function needs to add a column named \code{node_order} containing +numbers by which the nodes need to be ordered. In case of edges you need to add +two columns. One named \code{edge_order}, controlling the order of outgoing edges, +and one named \code{edge_order_end} controlling the order of incoming edges. +For more details see \code{vignette("stacking_order")}.} \item{width}{Width of the node (\code{numeric}). When \code{split_nodes} is set to \code{TRUE} each part of the split node will have half this width. Use \code{"auto"} to automatically diff --git a/man/geom_sankeynode.Rd b/man/geom_sankeynode.Rd index db4582d..33bc646 100644 --- a/man/geom_sankeynode.Rd +++ b/man/geom_sankeynode.Rd @@ -20,7 +20,7 @@ geom_sankeynode( show.legend = NA, width = "auto", align = c("bottom", "top", "center", "justify"), - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, nudge_x = 0, @@ -99,7 +99,18 @@ It can be any of \code{"top"}, \code{"bottom"}, \code{"center"} or \code{"justif nodes and edges in a plot. Should be one of: \code{ascending} (default), sorts nodes and edges from large to small (largest on top); \code{descending} sorts nodes and edges from small to large (smallest -on top); \code{as_is} will leave the order of nodes and edges as they are in \code{data}.} +on top); \verb{ascending+} Same as \code{ascending} but it also arranges edges and nodes +by its aesthetics; \verb{descending+} Same as \code{descending} but it also +arranges edges/nodes by its aesthetics; +\code{as_is} will leave the order of nodes and edges as they are in \code{data}. + +You can also provide a custom function to control the stacking order of nodes and +edges. The function needs to accept one argument (\code{data}) which can be either +nodes or edges data. The function needs to add a column named \code{node_order} containing +numbers by which the nodes need to be ordered. In case of edges you need to add +two columns. One named \code{edge_order}, controlling the order of outgoing edges, +and one named \code{edge_order_end} controlling the order of incoming edges. +For more details see \code{vignette("stacking_order")}.} \item{h_space}{Horizontal space between split nodes (\code{numeric}). This argument is ignored when \code{split_nodes == FALSE}. Use \code{"auto"} to automatically position split nodes.} diff --git a/man/position_sankey.Rd b/man/position_sankey.Rd index 8aa9169..c13af21 100644 --- a/man/position_sankey.Rd +++ b/man/position_sankey.Rd @@ -14,7 +14,7 @@ PositionSankey position_sankey( width = "auto", align = c("bottom", "top", "center", "justify"), - order = c("ascending", "descending", "as_is"), + order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, nudge_x = 0, @@ -37,7 +37,18 @@ It can be any of \code{"top"}, \code{"bottom"}, \code{"center"} or \code{"justif nodes and edges in a plot. Should be one of: \code{ascending} (default), sorts nodes and edges from large to small (largest on top); \code{descending} sorts nodes and edges from small to large (smallest -on top); \code{as_is} will leave the order of nodes and edges as they are in \code{data}.} +on top); \verb{ascending+} Same as \code{ascending} but it also arranges edges and nodes +by its aesthetics; \verb{descending+} Same as \code{descending} but it also +arranges edges/nodes by its aesthetics; +\code{as_is} will leave the order of nodes and edges as they are in \code{data}. + +You can also provide a custom function to control the stacking order of nodes and +edges. The function needs to accept one argument (\code{data}) which can be either +nodes or edges data. The function needs to add a column named \code{node_order} containing +numbers by which the nodes need to be ordered. In case of edges you need to add +two columns. One named \code{edge_order}, controlling the order of outgoing edges, +and one named \code{edge_order_end} controlling the order of incoming edges. +For more details see \code{vignette("stacking_order")}.} \item{h_space}{Horizontal space between split nodes (\code{numeric}). This argument is ignored when \code{split_nodes == FALSE}. Use \code{"auto"} to automatically position split nodes.} diff --git a/tests/testthat/_snaps/plots/ggsankeyfier-segment.svg b/tests/testthat/_snaps/plots/ggsankeyfier-segment.svg index 4ff98f1..59a0144 100644 --- a/tests/testthat/_snaps/plots/ggsankeyfier-segment.svg +++ b/tests/testthat/_snaps/plots/ggsankeyfier-segment.svg @@ -27,28 +27,28 @@ - - + + + + + + + - + + + + + - - - - - - - - - - - + + diff --git a/tests/testthat/_snaps/plots/ggsankeyfier-stack-ident.svg b/tests/testthat/_snaps/plots/ggsankeyfier-stack-ident.svg index d2d1623..7749178 100644 --- a/tests/testthat/_snaps/plots/ggsankeyfier-stack-ident.svg +++ b/tests/testthat/_snaps/plots/ggsankeyfier-stack-ident.svg @@ -28,12 +28,12 @@ - - - + + + diff --git a/tests/testthat/_snaps/plots/ggsankeyfier-stage-param.svg b/tests/testthat/_snaps/plots/ggsankeyfier-stage-param.svg index 04e869d..c8da3dd 100644 --- a/tests/testthat/_snaps/plots/ggsankeyfier-stage-param.svg +++ b/tests/testthat/_snaps/plots/ggsankeyfier-stage-param.svg @@ -27,28 +27,28 @@ - - + + + + + + + - + + + + + - - - - - - - - - - - + + diff --git a/tests/testthat/_snaps/plots/ggsankeyfier-waist.svg b/tests/testthat/_snaps/plots/ggsankeyfier-waist.svg index aa104a9..13fe695 100644 --- a/tests/testthat/_snaps/plots/ggsankeyfier-waist.svg +++ b/tests/testthat/_snaps/plots/ggsankeyfier-waist.svg @@ -27,28 +27,28 @@ - - + + + + + + + - + + + + + - - - - - - - - - - - + + diff --git a/tests/testthat/test_exceptions.R b/tests/testthat/test_exceptions.R new file mode 100644 index 0000000..87ee07d --- /dev/null +++ b/tests/testthat/test_exceptions.R @@ -0,0 +1,11 @@ +test_that("Either `position` or separate arguments are used, not both", { + expect_error({ + pos <- position_sankey(v_space = "auto") + + ggplot(ecosystem_services_pivot1, + aes(x = stage, y = RCSES, group = node, connector = connector, + edge_id = edge_id)) + + geom_sankeyedge(position = pos, v_space = 0) + + }) +}) diff --git a/tests/testthat/test_position.r b/tests/testthat/test_position.r index 045135c..8548dac 100644 --- a/tests/testthat/test_position.r +++ b/tests/testthat/test_position.r @@ -101,3 +101,59 @@ test_that( ggsankeyfier:::.node_summary(NULL, dat2) }) }) + +test_that("Layer positions are calculated", { + expect_no_error({ + p <- ggplot(es, + aes(x = stage, y = RCSES, group = node, connector = connector, + edge_id = edge_id)) + + geom_sankeyedge() + geom_sankeynode() + + cp <- StatSankeyedge$compute_panel( + data = p$data |> + dplyr::mutate(group = .data$node, + x = as.numeric(.data$stage), + y = .data$RCSES, + PANEL = 1) + ) + ggsankeyfier:::.compute_layer_positions(PositionSankey, cp) + }) +}) + +test_that("Ordering by aesthetics works", { + expect_no_error({ + pos <- position_sankey(v_space = "auto", order = "ascending+") + + p <- ggplot(es, + aes(x = stage, y = RCSES, group = node, connector = connector, + edge_id = edge_id)) + + geom_sankeyedge(position = pos) + geom_sankeynode(position = pos) + print(p) + }) +}) + +test_that("Custom ordering works", { + expect_no_error({ + + fun <- function(data) { + if ("edge_id" %in% names(data)) { + + data$edge_order_end <- data$edge_order <- 1 + + } else { + + data$node_order <- 1 + + } + return(data) + + } + pos <- position_sankey(v_space = "auto", order = fun) + + p <- ggplot(es, + aes(x = stage, y = RCSES, group = node, connector = connector, + edge_id = edge_id)) + + geom_sankeyedge(position = pos) + geom_sankeynode(position = pos) + print(p) + }) +}) diff --git a/vignettes/positioning.Rmd b/vignettes/positioning.Rmd index 60b3f7d..11a5711 100644 --- a/vignettes/positioning.Rmd +++ b/vignettes/positioning.Rmd @@ -159,13 +159,18 @@ Another aspect you might want to control in a Sankey diagram is the stacking ord nodes and edges. When `order = "as_is"`, the nodes will be stacked in the order of their levels (or order of appearance), the edges will be arranged in the order of `edge_id` (or their order of appearance). Other options are `order = "ascending"` and `order = "descending"`, both of which are -based on the `y` aesthetic. +based on the `y` aesthetic. For more details see the dedicated `vignette("stacking_order")`. -In order to demonstrate the stacking order we reduce the number of records from the example data -(i.e., only select the higher risk chains from the data). This will produce a less cluttered -Sankey diagram. +## Nudging + +The function `position_sankey()` can also be used to position text labels. +Let's use it to add labels to the nodes. Note that we are using a simple +`ggplot2::geom_text()` layer, where we provide `"sankeynode"` as `stat` function +and the `pos` object for positioning the labels. + +```{r text_no_nudge, results='hide', fig.width=6, fig.height=3} -```{r stack_order} +## Start with subsetting to a less cluttered data set es_sub <- ecosystem_services |> subset(RCSES > quantile(RCSES, 0.99)) |> @@ -175,32 +180,7 @@ es_sub <- p <- ggplot(es_sub, aes(x = stage, y = RCSES, group = node, connector = connector, edge_id = edge_id)) -``` - -This will plot the nodes and edges in ascending stacking order (largest at the top): - -```{r stack_order_asc, results='hide', fig.width=6, fig.height=3} -pos <- position_sankey(v_space = "auto", order = "ascending") -p + geom_sankeyedge(aes(fill = service_section), position = pos) + - geom_sankeynode(position = pos) -``` -This will plot the nodes and edges in desacending stacking order (largest at the bottom): - -```{r stack_order_des, results='hide', fig.width=6, fig.height=3} -pos <- position_sankey(v_space = "auto", order = "descending") -p + geom_sankeyedge(aes(fill = service_section), position = pos) + - geom_sankeynode(position = pos) -``` - -## Nudging - -The function `position_sankey()` can also be used to position text labels. -Let's use it to add labels to the nodes. Note that we are using a simple -`ggplot2::geom_text()` layer, where we provide `"sankeynode"` as `stat` function -and the `pos` object for positioning the labels. - -```{r text_no_nudge, results='hide', fig.width=6, fig.height=3} pos <- position_sankey(v_space = "auto", order = "descending") p + geom_sankeyedge(aes(fill = service_section), position = pos) + geom_sankeynode(position = pos) + diff --git a/vignettes/stacking_order.Rmd b/vignettes/stacking_order.Rmd new file mode 100644 index 0000000..b23d2f0 --- /dev/null +++ b/vignettes/stacking_order.Rmd @@ -0,0 +1,112 @@ +--- +title: "Stacking Order" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Stacking Order} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.ext = "png", + dev = "ragg_png" +) +ggplot2::theme_set(ggplot2::theme_light()) +``` + +## Small to Large (or Vice Versa) + +Let's start by setting up a simple plot to demonstrate stacking order options: + +```{r setup} +library(ggsankeyfier) +library(ggplot2) + +## Let's start with subsetting the data to make it less cluttered +es_sub <- + ecosystem_services |> + subset(RCSES > quantile(RCSES, 0.99)) |> + pivot_stages_longer(c("activity_realm", "biotic_realm", "service_section"), + "RCSES", "service_section") + +p <- ggplot(es_sub, + aes(x = stage, y = RCSES, group = node, connector = connector, + edge_id = edge_id)) +``` + +Using `position_sankey()`, a stacking order can be specified. Let's +start by demonstrating the ascending order (largest at the top): + +```{r stack_order_asc, results='hide', fig.width=6, fig.height=3} +pos <- position_sankey(v_space = "auto", order = "ascending") +p + geom_sankeyedge(aes(fill = service_section), position = pos) + + geom_sankeynode(position = pos) +``` + +This will plot the nodes and edges in descending stacking order (largest at the bottom): + +```{r stack_order_des, results='hide', fig.width=6, fig.height=3} +pos <- position_sankey(v_space = "auto", order = "descending") +p + geom_sankeyedge(aes(fill = service_section), position = pos) + + geom_sankeynode(position = pos) +``` + +## More Order Please + +Even though the nodes and edges are sorted by their size in the plot above, +it is still hard to read, as the coloured flow going to specific ecosystem +components can end up anywhere and don't align for incoming and outgoing +edges. This is where order options `ascending+` and `descending+` come in +handy. Before sorting the edges by size, it will first arrange them by its +aesthetics (in case of this example, the `fill` colour). Like so: + +```{r stack_order_plus, results='hide', fig.width=6, fig.height=3} +pos <- position_sankey(order = "descending+", v_space = "auto", align = "justify") + +p + geom_sankeyedge(aes(fill = service_section), position = pos) + + geom_sankeynode(position = pos) +``` + +As you will notice, the edges with the same fill colour now line up. + +## Give me the Power + +If all of this isn't enough, you can write your own ordering function, +giving you full power over the stacking order of edges and nodes. This +function should accept 1 argument: `data`. `position_sankey()` will call +this function with a `data.frame` containing either information about +nodes, or edges. Your custom function should return the same `data.frame`, +with extra information for the ordering. In case of nodes, the function should +add a column named `node_order`, in case of edges, two columns need to be added: +`edge_order` for outgoing flows, and `edge_order_end` for incoming flows. + +The example below shows how you can write such a function and how it affects +your plot. + +```{r stack_order_custom, results='hide', fig.width=6, fig.height=3} +## Definition of a custom ordering function: +custom_order <- function(data) { + + if ("edge_id" %in% names(data)) { # data contains edge info + + ## Order incoming edges from big to small + data$edge_order_end <- data$y + ## Order outgoing edges from small to big (note minus sign) + data$edge_order <- -data$y + + } else { ## data contains node info + + data$node_order <- data$y + + } + return(data) +} + +pos <- position_sankey(v_space = "auto", order = custom_order) + +p + geom_sankeyedge(aes(fill = service_section), position = pos) + + geom_sankeynode(position = pos) +```