diff --git a/DESCRIPTION b/DESCRIPTION index deb61af..f426ecb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ggsankeyfier Type: Package Title: Create Sankey and Alluvial Diagrams Using 'ggplot2' -Version: 0.1.8.0010 +Version: 0.1.8.0011 Authors@R: c(person("Pepijn", "de Vries", role = c("aut", "cre", "dtc"), email = "pepijn.devries@outlook.com", comment = c(ORCID = "0000-0002-7961-6646")), diff --git a/NEWS.md b/NEWS.md index 1e9f04e..2148177 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,12 @@ -ggsankeyfier v0.1.8.0010 +ggsankeyfier v0.1.8.0011 ------------- * Added better stacking order features + * Added `curve_weight` parameter to `geom_sankeyedge()` * Added check workflow * Added code coverage workflow and badge * Improved test coverage + * Expanded documentation * Fix for [issue #23](https://github.com/pepijn-devries/ggsankeyfier/issues/23) ggsankeyfier v0.1.8 diff --git a/R/draw_edges.r b/R/draw_edges.r index d06d064..4d22c45 100644 --- a/R/draw_edges.r +++ b/R/draw_edges.r @@ -46,7 +46,8 @@ dplyr::mutate( bez = mapply( - function(x, y, xend, yend, y_size, slope, ncp, fill, colour, linetype, linewidth, + function(x, y, xend, yend, y_size, slope, curve_weight, + ncp, fill, colour, linetype, linewidth, alpha, waist, res, connector) { gp <- grid::gpar(fill = fill, col = colour, lwd = linewidth*ggplot2::.pt, lty = linetype, alpha = alpha) @@ -90,7 +91,9 @@ ) } else { vwline::offsetBezierGrob( - x = grid::unit(c(x, x + slope2, xend - slope2, xend), "npc"), + x = grid::unit(c( + x, x + slope2 * (curve_weight + 0.5), + xend - slope2 * (1.5 - curve_weight), xend), "npc"), y = grid::unit(c(y, y, yend, yend), "npc"), w = grid::unit(c(1, waist, 1)*y_size, "npc")*asp_cor, stepFn = gridBezier::nSteps(ncp), @@ -99,7 +102,9 @@ } }, x = .data$x, y = .data$y, xend = .data$xend, - yend = .data$yend, y_size = .data$edge_size, slope = .data$slope, + yend = .data$yend, y_size = .data$edge_size, + slope = .data$slope, + curve_weight = .data$curve_weight, ncp = .data$ncp, fill = .data$fill, colour = .data$colour, linetype = .data$linetype, linewidth = .data$linewidth, diff --git a/R/geom_edge.r b/R/geom_edge.r index 382c7ad..c57ed88 100644 --- a/R/geom_edge.r +++ b/R/geom_edge.r @@ -24,6 +24,10 @@ #' @inheritParams ggplot2::geom_segment #' @param slope Slope parameter (`numeric`) for the Bezier curves used to depict the edges. #' Any value between 0 and 1 will work nicely. Other non-zero values will also work. +#' @param curve_weight Places weight on the Bezier curve. Values close to zero will +#' pull the inflection point of the curve towards outgoing nodes. Values close to one +#' will pull them towards incoming nodes. The default is 0.5, which will place the +#' inflection point exactly in the middle of the connecting nodes. #' @param ncp Number of control points on the Bezier curve that forms the edge. Larger #' numbers will result in smoother curves, but cost more computational time. Default is #' 100. @@ -49,17 +53,22 @@ GeomSankeyedge <- draw_panel = .draw_edges, setup_data = function(data, params) { data <- GeomSankeysegment$setup_data(data, params) - data <- data |> - dplyr::mutate( - slope = params$slope, - ncp = params$ncp + unique_x <- unique(data$x) |> sort() + curve_params <- + data.frame( + x = unique_x, + slope = rep(params$slope, length.out = length(unique_x)), + curve_weight = rep(params$curve_weight, length.out = length(unique_x)), + ncp = params$ncp ) + data <- data |> + dplyr::left_join(curve_params, by = "x") return(data) }, rename_size = FALSE, default_aes = c(GeomSankeysegment$default_aes, waist = 1), draw_key = draw_key_sankeyedge, - extra_params = c("na.rm", "slope", "ncp") + extra_params = c("na.rm", "slope", "curve_weight", "ncp") ) #' @name geom_sankeyedge @@ -68,7 +77,7 @@ GeomSankeyedge <- geom_sankeyedge <- function(mapping = NULL, data = NULL, stat = "sankeyedge", position = "sankey", na.rm = FALSE, show.legend = NA, - slope = 0.5, ncp = 100, + slope = 0.5, curve_weight = 0.5, ncp = 100, width = "auto", align = c("bottom", "top", "center", "justify"), order = c("ascending", "descending", "ascending+", "descending+", "as_is"), h_space = "auto", v_space = 0, @@ -82,6 +91,7 @@ geom_sankeyedge <- ggplot2::layer( geom = GeomSankeyedge, mapping = mapping, data = data, stat = stat, position = position, show.legend = show.legend, inherit.aes = inherit.aes, - params = list(na.rm = na.rm, slope = slope, ncp = ncp, ...) + params = list(na.rm = na.rm, slope = slope, curve_weight = curve_weight, + ncp = ncp, ...) ) } diff --git a/R/stat_edge.r b/R/stat_edge.r index 6109519..62bc81a 100644 --- a/R/stat_edge.r +++ b/R/stat_edge.r @@ -28,12 +28,13 @@ StatSankeyedge <- #' @export stat_sankeyedge <- function(mapping = NULL, data = NULL, geom = "sankeyedge", - position = "sankey", na.rm = FALSE, slope = 0.5, ncp = 100, - show.legend = NA, inherit.aes = TRUE, ...) { + position = "sankey", na.rm = FALSE, slope = 0.5, curve_weight = 0.5, + ncp = 100, show.legend = NA, inherit.aes = TRUE, ...) { ggplot2::layer( stat = StatSankeyedge, data = data, mapping = mapping, geom = geom, position = position, show.legend = show.legend, inherit.aes = inherit.aes, - params = list(na.rm = na.rm, slope = slope, ncp = ncp, ...) + params = list(na.rm = na.rm, slope = slope, curve_weight = curve_weight, + ncp = ncp, ...) ) } diff --git a/README.Rmd b/README.Rmd index 3e1b786..c9541ca 100644 --- a/README.Rmd +++ b/README.Rmd @@ -35,7 +35,7 @@ The `ggsankeyfier` packages allows you to visualise your data as Sankey or Alluv A Sankey diagram is essentially a stacked bar plot, where the bands connect bars across stages (on the x-axis), to show how quantities flow between them. -## Why use `ggsankeyfier`? +## Why Use `ggsankeyfier`? `ggsankeyfier` allows you to add Sankey diagram layers to a `ggplot2::ggplot()`. The package also provides `stat_*` and `position_*` functions that allow you @@ -56,7 +56,7 @@ install.packages("ggsankeyfier") devtools::install_github('pepijn-devries/ggsankeyfier') ``` -## Important concepts +## Important Concepts As there is some variation in the definition and terminology used in Sankey diagrams, there are some introduced here for consistency across the package documentation. @@ -127,7 +127,7 @@ to `group`. In addition to these 'standard' aesthetics, you also need to specify `connector` specifying the direction of an edge (one of `'from'` or `'to'`); and an `edge_id` which is used to determine which connector ends should be paired together. -### Data management +### Data Management Note that the plotting routines require data organised in a `data.frame`, with in each row a 'connector'. A connector is either the start or an end of an edge. This allows @@ -136,7 +136,7 @@ cases this is not the type of data you will be working with. Check `vignette("data_management")`, on how to rearrange your data for displaying it in a Sankey diagram. -### Positioning nodes and edges +### Positioning Nodes and Edges The package gives you much control on the positioning of elements in the diagram. Think of: @@ -147,7 +147,7 @@ The package gives you much control on the positioning of elements in the diagram `vignette("positioning")` and `vignette("stacking_order")` will show you how. -### Decorating nodes and edges +### Decorating Nodes and Edges When creating your own Sankey diagrams you may want to alter its appearance. You may want to: diff --git a/README.md b/README.md index b1e8769..2c02cf7 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ or Alluvial diagrams. A Sankey diagram is essentially a stacked bar plot, where the bands connect bars across stages (on the x-axis), to show how quantities flow between them. -## Why use `ggsankeyfier`? +## Why Use `ggsankeyfier`? `ggsankeyfier` allows you to add Sankey diagram layers to a `ggplot2::ggplot()`. The package also provides `stat_*` and `position_*` @@ -46,7 +46,7 @@ install.packages("ggsankeyfier") devtools::install_github('pepijn-devries/ggsankeyfier') ``` -## Important concepts +## Important Concepts As there is some variation in the definition and terminology used in Sankey diagrams, there are some introduced here for consistency across @@ -133,7 +133,7 @@ addition to these ‘standard’ aesthetics, you also need to specify a `'to'`); and an `edge_id` which is used to determine which connector ends should be paired together. -### Data management +### Data Management Note that the plotting routines require data organised in a `data.frame`, with in each row a ‘connector’. A connector is either the @@ -143,7 +143,7 @@ not the type of data you will be working with. Check `vignette("data_management")`, on how to rearrange your data for displaying it in a Sankey diagram. -### Positioning nodes and edges +### Positioning Nodes and Edges The package gives you much control on the positioning of elements in the diagram. Think of: @@ -156,7 +156,7 @@ diagram. Think of: `vignette("positioning")` and `vignette("stacking_order")` will show you how. -### Decorating nodes and edges +### Decorating Nodes and Edges When creating your own Sankey diagrams you may want to alter its appearance. You may want to: @@ -188,7 +188,7 @@ from the Dutch Ministry of Agriculture, Nature and Food Quality ## Resources -- Piet GJ, Jongbloed RH, Bentley JW, Grundlehner A, Tamis JE, De Vries +- Piet GJ, Bentley JW, Jongbloed RH, Grundlehner A, Tamis JE, De Vries P (2024) A Cumulative Impact Assessment on the North Sea Capacity to Supply Ecosystem Services. Science of The Total Environment (498) [DOI:10.1016/j.scitotenv.2024.174149](https://doi.org/10.1016/j.scitotenv.2024.174149) diff --git a/man/geom_sankeyedge.Rd b/man/geom_sankeyedge.Rd index b6fe870..9bfe4d6 100644 --- a/man/geom_sankeyedge.Rd +++ b/man/geom_sankeyedge.Rd @@ -46,6 +46,7 @@ geom_sankeyedge( na.rm = FALSE, show.legend = NA, slope = 0.5, + curve_weight = 0.5, ncp = 100, width = "auto", align = c("bottom", "top", "center", "justify"), @@ -198,6 +199,11 @@ lists which parameters it can accept. \item{slope}{Slope parameter (\code{numeric}) for the Bezier curves used to depict the edges. Any value between 0 and 1 will work nicely. Other non-zero values will also work.} +\item{curve_weight}{Places weight on the Bezier curve. Values close to zero will +pull the inflection point of the curve towards outgoing nodes. Values close to one +will pull them towards incoming nodes. The default is 0.5, which will place the +inflection point exactly in the middle of the connecting nodes.} + \item{ncp}{Number of control points on the Bezier curve that forms the edge. Larger numbers will result in smoother curves, but cost more computational time. Default is 100.} diff --git a/man/stat_sankey.Rd b/man/stat_sankey.Rd index d3f0d81..08ab770 100644 --- a/man/stat_sankey.Rd +++ b/man/stat_sankey.Rd @@ -22,6 +22,7 @@ stat_sankeyedge( position = "sankey", na.rm = FALSE, slope = 0.5, + curve_weight = 0.5, ncp = 100, show.legend = NA, inherit.aes = TRUE, @@ -74,6 +75,11 @@ a warning. If \code{TRUE}, missing values are silently removed.} \item{slope}{Slope parameter (\code{numeric}) for the Bezier curves used to depict the edges. Any value between 0 and 1 will work nicely. Other non-zero values will also work.} +\item{curve_weight}{Places weight on the Bezier curve. Values close to zero will +pull the inflection point of the curve towards outgoing nodes. Values close to one +will pull them towards incoming nodes. The default is 0.5, which will place the +inflection point exactly in the middle of the connecting nodes.} + \item{ncp}{Number of control points on the Bezier curve that forms the edge. Larger numbers will result in smoother curves, but cost more computational time. Default is 100.} diff --git a/vignettes/data_management.Rmd b/vignettes/data_management.Rmd index 1393b5e..36a52ef 100644 --- a/vignettes/data_management.Rmd +++ b/vignettes/data_management.Rmd @@ -1,8 +1,8 @@ --- -title: "Sankey data management" +title: "Sankey Data Management" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Sankey data management} + %\VignetteIndexEntry{Sankey Data Management} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -26,7 +26,7 @@ probably organised in a wide format with stages of the Sankey diagram in columns For plotting this needs to be converted into a long format. Why and how to do this, is discussed below. -## Wide or long format? +## Wide or Long Format? A wide format would be typically used when working with the data. This can be best understood when the framework you wish to visualise represents a @@ -45,7 +45,7 @@ Now, when do we work with either the wide or the long format? When working with on chains, it makes sense to work with a wide format. When plotting with `ggsankeyfier` or modification of flow information is required, a long format is more suitable. -### Converting from wide to long +### Converting from Wide to Long This package comes with a function that allow you to pivot information with stages organised as columns (i.e., wide format) to a long format. All you need to do is specify which columns represent @@ -79,7 +79,7 @@ es_long <- ) ``` -## The edge id and connector +## The Edge id and Connector After pivoting to the long format as illustrated above you will note two additional columns that contain information that was not available in the wide format. Namely the columns `edge_id` diff --git a/vignettes/decorating.Rmd b/vignettes/decorating.Rmd index 58bbc1f..bec08bc 100644 --- a/vignettes/decorating.Rmd +++ b/vignettes/decorating.Rmd @@ -1,8 +1,8 @@ --- -title: "Decorating Sankey diagrams" +title: "Decorating Sankey Diagrams" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Decorating Sankey diagrams} + %\VignetteIndexEntry{Decorating Sankey Diagrams} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -16,7 +16,7 @@ knitr::opts_chunk$set( ) ``` -## Decorating nodes and edges with aesthetics +## Decorating Nodes and Edges with Aesthetics With the `waist` aesthetic you can modify the shape of the edge curve, this is presented in more detail in the [Curve shape] section. Here it is shown how Nodes and edges can @@ -54,7 +54,7 @@ Note that both nodes and edges can be decorated separately. Also note that each variable that is assigned to one or more aesthetics will get its own guide legend. More about this in the section [Keys and legends]. -## Additional layers +## Additional Layers When you want to add additional layers to your plot (such as text labels), it is important that those layers apply the same statistics and positioning as the `geom_sankeyedge` or @@ -95,7 +95,7 @@ p + geom_bar(position = pos, stat = "sankeynode") ``` -## Curve shape +## Curve Shape The curves that connect the nodes in `ggsankeyfier` are drawn as symmetrical widened [Bézier](https://en.wikipedia.org/wiki/B%C3%A9zier_curve) curves. The slope of @@ -111,6 +111,19 @@ p + You could even go beyond the `slope` of 1, but then the curve will start to zigzag. With values less than 1 will result in gentler slopes. +You can also position the infliction point of the curve. By default it's exactly +at the center in between the two connecting nodes. By setting the `curve_weight` +parameter to values close to zero, the infliction point will move to the left. +Values close to one will make it move to the right. You can set this parameter +for each individual stage (the same is true for the `slope` parameter). + +```{r curve_weight, fig.width=6, fig.height=3} +p + + geom_sankeyedge(curve_weight = c(0.1, 0.9), position = pos, + mapping = aes(fill = service_section)) + + geom_sankeynode(position = pos) +``` + We can also play with how much the curve is widened. By default the width of the curve is constant along the Bézier curve it follows. By setting the `waist` aesthetic, the width of the curve is blown up, or shrunk down at its center. There are also `scale_waist_*` functions @@ -123,7 +136,7 @@ p + geom_sankeynode(position = pos) ``` -## Keys and legends +## Keys and Legends Both nodes and edges have their own `draw_key()` function, meaning that they are drawn automatically by `ggplot2::guide_legend()`. When multiple aesthetics @@ -139,7 +152,7 @@ p + scale_fill_binned(guide = "legend") ``` -## Different themes +## Different Themes At the top of this vignette we set `ggplot2::theme_light()` as the default theme. It is also possible to add themes directly to the plot: diff --git a/vignettes/loopdeloop.Rmd b/vignettes/loopdeloop.Rmd index d5ca99c..242db90 100644 --- a/vignettes/loopdeloop.Rmd +++ b/vignettes/loopdeloop.Rmd @@ -16,7 +16,7 @@ knitr::opts_chunk$set( ) ``` -## To cycle or not to cycle +## To Cycle or not to Cycle The `ggsankeyfier` package requires you to specify from which node, to which node an edge flows. See `vignette("data_management")` for more technical details on how this works. Consequently, this allows you to let edges flow to any node in any stage. In most cases, Sankey or alluvial @@ -73,7 +73,7 @@ es_subset_feedback <- p %+% es_subset_feedback ``` -## Self reference +## Self Reference Edges don't even have to flow from one stage to another regardless of its direction. Instead, it is also possible to let an edge flow from and to the same stage. In fact, you diff --git a/vignettes/positioning.Rmd b/vignettes/positioning.Rmd index 11a5711..5a20280 100644 --- a/vignettes/positioning.Rmd +++ b/vignettes/positioning.Rmd @@ -1,8 +1,8 @@ --- -title: "Positioning Sankey elements" +title: "Positioning Sankey Elements" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Positioning Sankey elements} + %\VignetteIndexEntry{Positioning Sankey Elements} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} ---