epiforecasts
diff --git a/‎R/metrics-multivariate-point.R‎
Lines changed: 17 additions & 28 deletions b/‎R/metrics-multivariate-point.R‎
Lines changed: 17 additions & 28 deletions
diff --git a/‎R/metrics-multivariate-sample.R‎
Lines changed: 44 additions & 15 deletions b/‎R/metrics-multivariate-sample.R‎
Lines changed: 44 additions & 15 deletions
diff --git a/‎man/energy_score_multivariate.Rd‎
Lines changed: 23 additions & 5 deletions b/‎man/energy_score_multivariate.Rd‎
Lines changed: 23 additions & 5 deletions
diff --git a/‎man/variogram_score_multivariate.Rd‎
Lines changed: 27 additions & 11 deletions b/‎man/variogram_score_multivariate.Rd‎
Lines changed: 27 additions & 11 deletions
diff --git a/‎man/variogram_score_multivariate_point.Rd‎
Lines changed: 32 additions & 11 deletions b/‎man/variogram_score_multivariate_point.Rd‎
Lines changed: 32 additions & 11 deletions
diff --git a/‎vignettes/scoring-multivariate-forecasts.Rmd‎
Lines changed: 37 additions & 59 deletions b/‎vignettes/scoring-multivariate-forecasts.Rmd‎
Lines changed: 37 additions & 59 deletions
@@ -1,21 +1,16 @@
 #' @title Variogram score for multivariate point forecasts
 #' @description
-#' Compute the variogram score
-#' (see [scoringRules::vs_sample()])
-#' for each group defined by `mv_group_id`, treating each point
-#' forecast as a single-sample ensemble.
-#' @param observed Numeric vector of observed values.
+#' Compute the variogram score for multivariate point forecasts,
+#' treating each point forecast as a single-sample ensemble.
+#' This is a thin wrapper around
+#' [variogram_score_multivariate()] with `w = NULL`.
+#'
+#' See [variogram_score_multivariate()] for details on the
+#' variogram score and its parameters.
+#' @inheritParams variogram_score_multivariate
+#' @inherit variogram_score_multivariate return references
 #' @param predicted Numeric matrix with one column, where each row
 #'   corresponds to a target within a multivariate group.
-#' @param mv_group_id Numeric vector of length equal to
-#'   `length(observed)` with group identifiers.
-#' @param w_vs Numeric matrix of weights for the variogram score.
-#'   See [scoringRules::vs_sample()] for details.
-#' @param p Numeric, order of the variogram score.
-#'   Defaults to 0.5. See [scoringRules::vs_sample()] for details.
-#' @return A named numeric vector of scores, one per multivariate
-#'   group. Lower values are better.
-#' @importFrom scoringRules vs_sample
 #' @importFrom checkmate assert_numeric
 #' @export
 #' @keywords metric
@@ -27,19 +22,13 @@ variogram_score_multivariate_point <- function(
   assert_numeric(observed, min.len = 1)
   assert_numeric(as.vector(predicted), min.len = 1)
   assert_numeric(mv_group_id, len = length(observed))
-  unique_groups <- unique(mv_group_id)
-
-  vs <- vapply(unique_groups, function(group) {
-    idx <- which(mv_group_id == group)
-    scoringRules::vs_sample(
-      y = observed[idx],
-      dat = predicted[idx, , drop = FALSE],
-      w_vs = w_vs,
-      p = p
-    )
-  }, numeric(1))
-
-  names(vs) <- unique_groups
-  return(vs)
+  variogram_score_multivariate(
+    observed = observed,
+    predicted = predicted,
+    mv_group_id = mv_group_id,
+    w = NULL,
+    w_vs = w_vs,
+    p = p
+  )
 }
 # nolint end
@@ -29,13 +29,28 @@ assert_input_multivariate_sample <- function(observed, predicted, mv_group_id) {
 
 #' @title Energy score for multivariate forecasts
 #' @description
-#' Compute the multivariate energy score
-#' (see \link[scoringRules:es_sample]{scoringRules::es_sample})
-#' for each group defined by `mv_group_id`.
+#' Compute the energy score (Gneiting et al., 2008) for each
+#' multivariate group defined by `mv_group_id`. The energy
+#' score is a multivariate generalisation of the CRPS that
+#' measures both calibration and sharpness of the forecast
+#' distribution.
+#'
+#' The score is computed using
+#' [scoringRules::es_sample()].
 #' @inheritParams ae_median_sample
 #' @inheritParams assert_input_multivariate_sample
-#' @inherit scoringRules::es_sample params
-#' @keywords internal_input_check
+#' @param w Optional numeric vector of weights for forecast samples
+#'   (length equal to the number of columns of `predicted`).
+#'   If `NULL` (the default), equal weights are used.
+#' @return A named numeric vector of scores, one per multivariate
+#'   group. Lower values are better.
+#' @references
+#' Gneiting, T., Stanberry, L.I., Grimit, E.P., Held, L. and
+#' Johnson, N.A. (2008). Assessing probabilistic forecasts of
+#' multivariate quantities, with an application to ensemble
+#' predictions of surface winds.
+#' *TEST*, 17, 211-235.
+#' @keywords metric
 #' @export
 energy_score_multivariate <- function(observed, predicted, mv_group_id, w = NULL) {
   assert_input_multivariate_sample(observed, predicted, mv_group_id)
@@ -54,21 +69,35 @@ energy_score_multivariate <- function(observed, predicted, mv_group_id, w = NULL
 #' Variogram score for multivariate forecasts
 #'
 #' @description
-#' Compute the variogram score for multivariate forecasts.
-#' The variogram score (Scheuerer and Hamill, 2015) evaluates the
-#' dependence structure of multivariate forecasts by comparing
-#' predicted pairwise differences against observed pairwise
-#' differences.
+#' Compute the variogram score for each multivariate group
+#' defined by `mv_group_id`.
+#' The variogram score (Scheuerer and Hamill, 2015) assesses
+#' whether a forecast captures the correlation structure across
+#' the targets being forecast jointly (e.g. locations, age
+#' groups). For each pair of targets (i, j), it compares the
+#' observed absolute difference |y_i - y_j|^p against the
+#' expected absolute difference under the forecast distribution.
+#' A forecast that misspecifies correlations between targets
+#' will predict pairwise differences that do not match the
+#' observations, resulting in a higher score.
 #'
 #' The score is computed using
 #' [scoringRules::vs_sample()].
 #'
 #' @inheritParams energy_score_multivariate
-#' @param w_vs Optional non-negative weight matrix. If not `NULL`,
-#'   must be a square matrix with dimensions equal to the number
-#'   of targets within each multivariate group.
-#' @param p Numeric, order of the variogram score.
-#'   Typical choices are 0.5 (default, more robust) and 1.
+#' @param w_vs Optional non-negative weight matrix for the
+#'   pairwise comparisons between targets. Entry `w_vs[i, j]`
+#'   controls the importance of the pair (i, j) in the score.
+#'   Must be a symmetric square matrix with rows and columns
+#'   equal to the number of targets within each multivariate
+#'   group.
+#'   If `NULL` (the default), all pairs are weighted equally.
+#' @param p Numeric, order of the variogram score. This controls
+#'   how pairwise differences are scaled: the score compares
+#'   |y_i - y_j|^p across targets. Lower values of `p` give
+#'   less weight to large differences, making the score more
+#'   robust to outliers. Typical choices are 0.5 (the default)
+#'   and 1.
 #' @return A named numeric vector of scores, one per multivariate
 #'   group. Lower values are better.
 #' @references
 
@@ -83,71 +83,49 @@ The column `.mv_group_id` is created automatically and represents an identifier
 score(example_multiv)
 ```
 
-If, at any point, you want to score the same forecast using different groupings, you'd have create a new separate forecast object with a different grouping and score that new forecast object.
-
-
-## Univariate and multivariate scoring for matrices
-
-Note: this section may only be relevant to you if you're planning to score forecasts in matrix format.
-
-Let's construct a simple multivariate forecast:
-
-```{r}
-# parameters for multivariate normal example
-set.seed(123)
-d <- 10 # number of dimensions
-m <- 50 # number of samples from multivariate forecast distribution
-
-mu0 <- rep(0, d)
-mu <- rep(1, d)
-
-S0 <- S <- diag(d)
-S0[S0 == 0] <- 0.2
-S[S == 0] <- 0.1
-
-# generate samples from multivariate normal distributions
-obs <- drop(mu0 + rnorm(d) %*% chol(S0))
-fc_sample <- replicate(m, drop(mu + rnorm(d) %*% chol(S)))
-
-obs2 <- drop(mu0 + rnorm(d) %*% chol(S0))
-fc_sample2 <- replicate(m, drop(mu + rnorm(d) %*% chol(S)))
-```
-
-Now, we can compute the Energy Score. Let's compare the `scoringutils` implementation with that of the `scoringRules` package, on which the `scoringutils` implementation is based. The only difference is that `scoringRules` always expects a single multivariate `forecast`, while the `scoringutils` implementation can handle multiple multivariate forecasts together, identified via a grouping vector (assuming they all have the same dimension).
+By default, `score()` computes both the energy score and the variogram score for multivariate sample forecasts.
+The energy score is a multivariate generalisation of the CRPS that measures overall forecast accuracy.
+The variogram score (Scheuerer and Hamill, 2015) specifically targets the correlation structure between the targets being forecast jointly.
+For each pair of targets (e.g. two countries), it compares the observed absolute difference |y_i - y_j|^p against what the forecast distribution predicts for that difference.
+A forecast that gets the correlations between targets wrong will predict pairwise differences that do not match the observations, producing a higher score.
+This makes the variogram score more sensitive to misspecified correlations than the energy score.
+
+You can customise parameters using `purrr::partial()`.
+The order parameter `p` controls how differences are scaled: `p = 0.5` (the default) is more robust to outliers, while `p = 1` gives a standard absolute difference.
+See `?variogram_score_multivariate` for full parameter documentation.
+For example, to use `p = 1`:
 
 ```{r}
-scoringRules::es_sample(y = obs, dat = fc_sample)
-# in the univariate case, Energy Score and CRPS are the same
-# illustration: Evaluate forecast sample for the first variable
-es_sr1 <- scoringRules::es_sample(y = obs, dat = fc_sample)
-es_sr2 <- scoringRules::es_sample(y = obs2, dat = fc_sample2)
-es_sr <- c(es_sr1, es_sr2)
-
-es_su <- energy_score_multivariate(
-  observed = c(obs, obs2),
-  predicted = rbind(fc_sample, fc_sample2),
-  mv_group_id = c(rep(1, d), rep(2, d))
+score(
+  example_multiv,
+  metrics = list(
+    energy_score = energy_score_multivariate,
+    variogram_score = purrr::partial(
+      variogram_score_multivariate, p = 1
+    )
+  )
 )
-all.equal(es_sr, es_su, tolerance = 1e-6, check.attributes = FALSE)
 ```
 
-You can provide observation weights when computing the Energy Score.
+## Multivariate point forecasts
+
+If you have point forecasts rather than samples, you can score them using the variogram score via `as_forecast_multivariate_point()`.
+This treats each point forecast as a single-sample ensemble.
 
 ```{r}
-# illustration of observation weights for Energy Score
-# example: equal weights for first half of draws; zero weights for other draws
-w <- rep(c(1, 0), each = 0.5 * m) / (0.5 * m)
-
-es_sr1 <- scoringRules::es_sample(y = obs, dat = fc_sample, w = w)
-es_sr2 <- scoringRules::es_sample(y = obs2, dat = fc_sample2, w = w)
-es_sr <- c(es_sr1, es_sr2)
-
-es_su <- energy_score_multivariate(
-  observed = c(obs, obs2),
-  predicted = rbind(fc_sample, fc_sample2),
-  mv_group_id = c(rep(1, d), rep(2, d)),
-  w = w
-)
+example_point_multi <- example_point[
+  target_type == "Cases" &
+    forecast_date == "2021-05-03" &
+    target_end_date == "2021-05-15" &
+    horizon == 2 &
+    model == "EuroCOVIDhub-ensemble"
+]
 
-all.equal(es_sr, es_su, tolerance = 1e-6, check.attributes = FALSE)
+example_mv_point <- as_forecast_multivariate_point(
+  data = na.omit(example_point_multi),
+  joint_across = c("location", "location_name")
+)
+score(example_mv_point)
 ```
+
+If, at any point, you want to score the same forecast using different groupings, you'd have create a new separate forecast object with a different grouping and score that new forecast object.