Skip to content

Commit b90abac

Browse files
authored
Add one-way ANOVA function implementation (#195)
1 parent 7a5ea9c commit b90abac

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#' One-way ANOVA (from scratch)
2+
#'
3+
#' @description
4+
#' Computes a one-way Analysis of Variance (ANOVA) to test whether k group means
5+
#' are equal. Implements sums of squares, F statistic, and p-value without using \code{aov()}.
6+
#'
7+
#' @param x Numeric vector of observations.
8+
#' @param g Factor or character vector of group labels (same length as \code{x}).
9+
#'
10+
#' @return A list with:
11+
#' \item{df_between}{Degrees of freedom between groups (k - 1).}
12+
#' \item{df_within}{Degrees of freedom within groups (n - k).}
13+
#' \item{ss_between}{Between-group sum of squares.}
14+
#' \item{ss_within}{Within-group sum of squares.}
15+
#' \item{ms_between}{Between-group mean square.}
16+
#' \item{ms_within}{Within-group mean square.}
17+
#' \item{F}{F-statistic.}
18+
#' \item{p_value}{Right-tail p-value from F distribution.}
19+
#'
20+
#' @details
21+
#' One-way ANOVA partitions total variance into between-group and within-group components:
22+
#' \deqn{SS_T = SS_B + SS_W.}
23+
#' The test statistic \eqn{F = MS_B / MS_W} follows an F distribution under H0 (equal means).
24+
#'
25+
#' @examples
26+
#' set.seed(0)
27+
#' x <- c(rnorm(20, 0, 1), rnorm(22, 0.2, 1), rnorm(18, -0.1, 1))
28+
#' g <- factor(rep(c("A", "B", "C"), times = c(20, 22, 18)))
29+
#' res <- anova_oneway(x, g)
30+
#' res$F; res$p_value
31+
#'
32+
#' @export
33+
anova_oneway <- function(x, g) {
34+
if (!is.numeric(x)) stop("`x` must be numeric.")
35+
if (length(x) != length(g)) stop("`x` and `g` must have the same length.")
36+
if (!is.factor(g)) g <- factor(g)
37+
38+
n <- length(x)
39+
k <- nlevels(g)
40+
if (k < 2L) stop("Need at least 2 groups.")
41+
if (n <= k) stop("Total observations must exceed number of groups.")
42+
43+
grand_mean <- mean(x)
44+
45+
# group stats
46+
group_means <- tapply(x, g, mean)
47+
group_sizes <- tapply(x, g, length)
48+
49+
# SS_between = sum_{groups} n_j * (mean_j - grand_mean)^2
50+
ss_between <- sum(group_sizes * (group_means - grand_mean)^2)
51+
52+
# SS_within = sum over groups of sum (x_ij - mean_j)^2
53+
ss_within <- sum(unlist(tapply(x, g, function(v) sum((v - mean(v))^2))))
54+
55+
df_between <- k - 1L
56+
df_within <- n - k
57+
58+
ms_between <- ss_between / df_between
59+
ms_within <- ss_within / df_within
60+
61+
F_stat <- ms_between / ms_within
62+
p_val <- stats::pf(F_stat, df_between, df_within, lower.tail = FALSE)
63+
64+
list(
65+
df_between = df_between,
66+
df_within = df_within,
67+
ss_between = ss_between,
68+
ss_within = ss_within,
69+
ms_between = ms_between,
70+
ms_within = ms_within,
71+
F = F_stat,
72+
p_value = p_val
73+
)
74+
}
75+
76+
# --- Simple self-test (uncomment to run locally) ---
77+
# set.seed(1)
78+
# x <- c(rnorm(15, 0, 1), rnorm(15, 0.5, 1), rnorm(15, 1, 1))
79+
# g <- factor(rep(c("A","B","C"), each = 15))
80+
# res <- anova_oneway(x, g)
81+
# stopifnot(res$F > 0, res$p_value >= 0, res$p_value <= 1)

0 commit comments

Comments
 (0)