Skip to content

Commit a402a37

Browse files
committed
Add possibility to rescale scores on ranks
Rescaling scores on ranks produces more stable rankings, less prone to fluctuations when adding or removing integrations. Parameter `rescale` of `ScaleScores()` and `PlotScores()` is not a boolean anymore. It now accepts "rank" (default for `PlotScores()`). Also, docs better explain rescaling.
1 parent 29fdc9e commit a402a37

File tree

4 files changed

+143
-20
lines changed

4 files changed

+143
-20
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# SeuratIntegrate (development version)
22

3+
* Revised score rescaling with a new option enabling min-max rescaling of ranks
4+
(default) rather than scores directly (as in
5+
[Luecken *et al.*, 2021](https://doi.org/10.1038/s41592-021-01336-8))
6+
37
* Speed up Dijkstra's algorithm-like used in `ExpandNeighbours` with a new c++
48
implementation
59

R/scores.R

Lines changed: 90 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,23 @@ SetMiscScore <- function(object, integration, score.name, score.value, ...) {
196196
return(object)
197197
}
198198

199+
200+
#' Compute ranks of a vector and rescale between 0 and 1
201+
#' @description
202+
#' Compute ranks of a vector with \code{NA}s kept in their original positions.
203+
#' The ranks are then adjusted by adding half the number of code{NA} values.
204+
#' Those adjusted ranks are then normalised by the length of the vector, such
205+
#' that resulting values are bounded between 0 and 1 when there is no \code{NA}.
206+
#' The range of the output values shrinks as the number of \code{NA}s increases.
207+
#'
208+
#' @param x a numerical vector
209+
#' @keywords internal
210+
#' @noRd
211+
adj_rank_scaled <- function(x) {
212+
adj_rank <- rank(x, na.last = 'keep') + sum(is.na(x)) / 2
213+
return(adj_rank / length(x))
214+
}
215+
199216
#' @keywords internal
200217
#' @noRd
201218
get.score.types <- function(col_names, batch = FALSE) {
@@ -279,26 +296,43 @@ compute.overall.scores <- function(scaled.scores, batch.scores, bio.scores,
279296
#' @param ref the name of the integration to use as a reference for scaling.
280297
#' Useful for PCA regression (and density) and cell cycle conservation scores.
281298
#' @param rescale whether to rescale each score between 0 and 1 using min-max
282-
#' normalisation before computing overall scores. This ensures that each metric
283-
#' equally contributes to the overall scores. \code{TRUE by default}
299+
#' normalisation before computing overall scores. One of 'none' (default, no
300+
#' rescaling), 'rank' or 'score', corresponding to no min-max normalisation of
301+
#' ranks or scores respectively. Beware that the argument of
302+
#' \code{\link{PlotScores()}} \strong{overrides the choice here} (see
303+
#' \strong{Details} sections).
284304
#' @param batch.coeff the weight of batch correction performance evaluation
285305
#' scores in the overall score.
286306
#' @param bio.coeff the weight of bio-conservation performance evaluation scores
287307
#' in the overall score.
288308
#'
309+
#' @details
310+
#' It is recommended to keep the default parameter \code{rescale = "none"} to
311+
#' retain the possibility to plot scores without min-max rescaling later with
312+
#' \code{\link{PlotScores()}}.
313+
#'
314+
#' @seealso \code{\link{PlotScores()}} for further details on rescaling
315+
#'
289316
#' @importFrom SeuratObject Misc
290317
#' @importFrom dplyr %>% select arrange filter summarise across mutate bind_rows rowwise c_across ungroup case_when
291318
#' @importFrom purrr map2 reduce
292319
#' @importFrom rlang sym syms data_syms !! !!!
293320
#' @importFrom scales rescale
294321
#' @export
295-
ScaleScores <- function(object, ref = "Unintegrated", rescale = FALSE,
322+
ScaleScores <- function(object, ref = "Unintegrated",
323+
rescale = c("none", "rank", "score"),
296324
batch.coeff = .4, bio.coeff = .6) {
297325
ref <- ref %||% "Unintegrated"
298326
Misc(object, slot = 'si_scores') %||%
299327
abort("No scores. Please compute scores before.")
300328
raw.scores <- Misc(object, slot = 'si_scores') %>%
301329
select(Integration, !Integration) # put Integration column 1st
330+
rescale <- rescale %||% "none"
331+
if (is.logical(rescale)) {
332+
rescale <- c("none", "rank")[rescale + 1]
333+
}
334+
rescale <- tolower(rescale)
335+
rescale <- match.arg(rescale)
302336

303337
# ensure sum of coefficients is 1 (to keep overall scores between 0 and 1)
304338
sum.coeff <- batch.coeff + bio.coeff
@@ -400,9 +434,15 @@ ScaleScores <- function(object, ref = "Unintegrated", rescale = FALSE,
400434
purrr::map2(colnames(.), ~ scaling[[.y]](.x, raw.scores$Integration)) %>%
401435
bind_rows()
402436

403-
if (rescale) {
437+
if (rescale != "none") {
438+
warning(sprintf("only %s enables plotting non-rescaled scores with `%s`",
439+
sQuote("rescale = 'none'"), "PlotScores()"),
440+
call. = TRUE, immediate. = TRUE)
404441
scaled.scores <- scaled.scores %>%
405-
mutate(across(!Integration, ~ rescale(x = .x, to = c(0,1))) )
442+
mutate(across(!Integration, ~ switch(rescale,
443+
"rank" = adj_rank_scaled(x = .x),
444+
"score" = rescale(x = .x, to = c(0,1))
445+
)) )
406446
}
407447

408448
bio.scores <- get.score.types(colnames(scaled.scores), batch = FALSE)
@@ -452,12 +492,15 @@ IntegrationScores <- function(object, scaled = FALSE) {
452492
#' @param exclude.score name of the score(s) to exclude. The default value
453493
#' (\code{NULL}) enable to include them all.
454494
#' @param recompute.overall.scores whether to recompute overall scores. Useful
455-
#' when there is a restriction on scores to plot. When \code{FALSE},
495+
#' when some scores or integrations are excluded. When \code{FALSE},
456496
#' coefficient parameters have no impact.
457497
#' @param rescale whether to rescale each score between 0 and 1 using min-max
458-
#' normalisation before computing overall scores. This ensures that each metric
459-
#' equally contributes to the overall scores. Has no effect when
460-
#' \code{recompute.overall.scores = FALSE}. \code{TRUE by default}
498+
#' normalisation before computing overall scores. One of 'rank' (default),
499+
#' 'score' or 'none'. The first two enable rescaling on ranks and scores
500+
#' respectively, while 'none' disables rescaling. Rescaling ensures that each
501+
#' metric equally contributes to the overall scores. 'rank' is more stable to
502+
#' changes in integrations included. Has no effect when
503+
#' \code{recompute.overall.scores = FALSE}.
461504
#' @param point.max.size inoperative unless \code{plot.type = 'table'} and
462505
#' \code{use.ggforce = FALSE}. Determine the maximum size of the points
463506
#' (only achieved for a score of 1) to fit the plotting area (handled
@@ -467,6 +510,32 @@ IntegrationScores <- function(object, scaled = FALSE) {
467510
#' installed
468511
#'
469512
#' @return a ggplot object
513+
#'
514+
#' @details
515+
#' The \code{rescale} parameter controls how scores are rescaled prior to
516+
#' computing overall scores. Thus, when \code{rescale != 'none'},
517+
#' \code{recompute.overall.scores} should be \code{TRUE} (default). Otherwise,
518+
#' the overall scores will no longer match the scores displayed on the plot.
519+
#'
520+
#' Albeit the min-max rescaling of scores in Luecken M.D. \emph{et al.}, 2022 is
521+
#' achievable with \code{rescale = 'score'}, min-max rescaling on ranks
522+
#' (\code{rescale = 'rank'}) is the default. Indeed, it results in more stable
523+
#' rankings of overall scores when the list of integrations to plot is
524+
#' modified.
525+
#'
526+
#' To plot non-rescaled scores, make sure to use
527+
#' \code{\link{ScaleScores}(..., rescale = "none")} beforehand. To plot rescaled
528+
#' scores however, the \code{rescale} parameter of \code{\link{ScaleScores}} has
529+
#' no impact on the scores obtained for plotting.
530+
#'
531+
#' @seealso \code{\link{ScaleScores()}}
532+
#'
533+
#' @references Luecken, M. D., Büttner, M., Chaichoompu, K., Danese, A.,
534+
#' Interlandi, M., Mueller, M. F., Strobl, D. C., Zappia, L., Dugas, M.,
535+
#' Colomé-Tatché, M. & Theis, F. J. Benchmarking atlas-level data integration in
536+
#' single-cell genomics. Nat Methods 19, 41–50 (2021).
537+
#' \href{https://doi.org/10.1038/s41592-021-01336-8}{DOI}
538+
#'
470539
#' @importFrom rlang is_installed !!
471540
#' @importFrom SeuratObject Misc
472541
#' @importFrom dplyr %>% mutate across where case_when desc select filter
@@ -484,7 +553,7 @@ PlotScores <- function(object, plot.type = c('dot', 'radar', 'lollipop'),
484553
include.score = NULL,
485554
exclude.score = NULL,
486555
recompute.overall.scores = TRUE,
487-
rescale = TRUE,
556+
rescale = c("rank", "score", "none"),
488557
batch.coeff = .4, bio.coeff = .6,
489558
point.max.size = 20L,
490559
use.ggforce = is_installed('ggforce')) {
@@ -493,6 +562,12 @@ PlotScores <- function(object, plot.type = c('dot', 'radar', 'lollipop'),
493562
plot.type <- match.arg(plot.type)
494563
order.by <- tolower(order.by %||% 'asis')
495564
order.by <- match.arg(order.by)
565+
rescale <- rescale %||% "none"
566+
if (is.logical(rescale)) {
567+
rescale <- c("none", "rank")[rescale + 1]
568+
}
569+
rescale <- tolower(rescale)
570+
rescale <- match.arg(rescale)
496571

497572
scaled.scores <- Misc(object, slot = 'si_scaled.scores')
498573

@@ -521,9 +596,12 @@ PlotScores <- function(object, plot.type = c('dot', 'radar', 'lollipop'),
521596
'Consider less harsh exclusion or broader inclusion criteria'))
522597
}
523598

524-
if (rescale) {
599+
if (rescale != 'none') {
525600
scaled.scores <- scaled.scores %>%
526-
mutate(across(!Integration, ~ rescale(x = .x, to = c(0,1))) )
601+
mutate(across(!Integration, ~ switch(rescale,
602+
"rank" = adj_rank_scaled(x = .x),
603+
"score" = rescale(x = .x, to = c(0,1))
604+
)) )
527605
}
528606

529607
bio.scores <- get.score.types(colnames(scaled.scores), batch = FALSE)

man/PlotScores.Rd

Lines changed: 35 additions & 5 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ScaleScores.Rd

Lines changed: 14 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)