Adjust ftest

andreasnoack · andreasnoack · commit f759de4dab80 · 2025-12-28T21:41:12.000+01:00
This commit changes the degrees of freedom shown in the table from
the number of estimated parameters (dof) to be the number of degrees
of freedom for the model (residual_dof). This makes it possible to
easily calculate the F-test statistic from the other quantities in
the table with the usual formulas.

Since nobs now returns floats, residual_dof also generally returns
floats, so the degrees of freedom parameters are now stored as floats
instead of integers.

The commit removes the R-squared quantities. They are not needed
and I don't think they add value.

The commit also removes numbering of the rows in the output. I don't
think it is helpful.

The show method is now for MIME"text/plain" since that is more
appropriate for a "decorated" show method that span multiple lines.
In most cases this won't be visible to users.
diff --git a/src/ftest.jl b/src/ftest.jl
@@ -5,11 +5,10 @@ struct SingleFTestResult
     pval::Float64
 end
 
-mutable struct FTestResult{N}
+struct FTestResult{N}
     nobs::Int
     ssr::NTuple{N,Float64}
-    dof::NTuple{N,Int}
-    r2::NTuple{N,Float64}
+    dof::NTuple{N,Float64}
     fstat::NTuple{N,Float64}
     pval::NTuple{N,Float64}
 end
@@ -79,10 +78,9 @@ For each sequential pair of linear models in `mod...`, perform an F-test to dete
 the one model fits significantly better than the other. Models must have been fitted
 on the same data, and be nested either in forward or backward direction.
 
-A table is returned containing consumed degrees of freedom (DOF),
-absolute difference in DOF from the preceding model, sum of squared residuals (SSR),
-absolute difference in SSR from the preceding model, R², absolute difference in R²
-from the preceding model, and F-statistic and p-value for the comparison
+A table is returned containing residual degrees of freedom (dof),
+absolute difference in dof from the preceding model, sum of squared residuals (SSR),
+absolute difference in SSR from the preceding model, and F-statistic and p-value for the comparison
 between the two models.
 
 !!! note
@@ -114,22 +112,22 @@ julia> bigmodel = lm(@formula(Result ~ 1 + Treatment + Other), dat);
 
 julia> ftest(nullmodel, model)
 F-test: 2 models fitted on 12 observations
-────────────────────────────────────────────────────────────────
-     DOF  ΔDOF     SSR    ΔSSR      R²     ΔR²        F*   p(>F)
-────────────────────────────────────────────────────────────────
-[1]    2        3.2292          0.0000
-[2]    3     1  0.1283  3.1008  0.9603  0.9603  241.6234  <1e-07
-────────────────────────────────────────────────────────────────
+───────────────────────────────────────────
+   SSR  dof    ΔSSR  Δdof        F*   p(>F)
+───────────────────────────────────────────
+3.2292   11
+0.1283   10  3.1008     1  241.6234  <1e-07
+───────────────────────────────────────────
 
 julia> ftest(nullmodel, model, bigmodel)
 F-test: 3 models fitted on 12 observations
-────────────────────────────────────────────────────────────────
-     DOF  ΔDOF     SSR    ΔSSR      R²     ΔR²        F*   p(>F)
-────────────────────────────────────────────────────────────────
-[1]    2        3.2292          0.0000
-[2]    3     1  0.1283  3.1008  0.9603  0.9603  241.6234  <1e-07
-[3]    5     2  0.1017  0.0266  0.9685  0.0082    1.0456  0.3950
-────────────────────────────────────────────────────────────────
+───────────────────────────────────────────
+   SSR  dof    ΔSSR  Δdof        F*   p(>F)
+───────────────────────────────────────────
+3.2292   11
+0.1283   10  3.1008     1  241.6234  <1e-07
+0.1017    8  0.0266     2    1.0456  0.3950
+───────────────────────────────────────────
 ```
 """
 function ftest(mods::LinearModel...; atol::Real=0.0)
@@ -156,23 +154,22 @@ function ftest(mods::LinearModel...; atol::Real=0.0)
 
     SSR = deviance.(mods)
 
-    df = dof.(mods)
+    df = dof_residual.(mods)
     Δdf = abs.(_diff(df))
-    dfr = Int.(dof_residual.(mods))
     MSR1 = _diffn(SSR) ./ Δdf
-    MSR2 = (SSR ./ dfr)
+    MSR2 = (SSR ./ df)
     if forward
         MSR2 = MSR2[2:end]
-        dfr_big = dfr[2:end]
+        dfr_big = df[2:end]
     else
         MSR2 = MSR2[1:(end - 1)]
-        dfr_big = dfr[1:(end - 1)]
+        dfr_big = df[1:(end - 1)]
     end
 
     fstat = abs.((NaN, (MSR1 ./ MSR2)...))
     pval = (NaN, ccdf.(FDist.(Δdf, dfr_big), fstat[2:end])...)
 
-    return FTestResult(Int(nobs(mods[1])), SSR, df, r2.(mods), fstat, pval)
+    return FTestResult(Int(nobs(mods[1])), SSR, float.(df), fstat, pval)
 end
 
 function show(io::IO, ftr::SingleFTestResult)
@@ -182,37 +179,34 @@ function show(io::IO, ftr::SingleFTestResult)
     return print(io, "p-value: ", PValue(ftr.pval))
 end
 
-function show(io::IO, ftr::FTestResult{N}) where {N}
+function show(io::IO, ::MIME"text/plain", ftr::FTestResult{N}) where {N}
     Δdof = abs.(_diff(ftr.dof))
     Δssr = abs.(_diff(ftr.ssr))
-    ΔR² = abs.(_diff(ftr.r2))
 
-    nc = 9
+    nc = 6
     nr = N
     outrows = Matrix{String}(undef, nr + 1, nc)
 
-    outrows[1, :] = ["", "DOF", "ΔDOF", "SSR", "ΔSSR",
-                     "R²", "ΔR²", "F*", "p(>F)"]
+    outrows[1, :] = ["SSR", "dof", "ΔSSR", "Δdof", "F*", "p(>F)"]
 
-    # get rid of negative zero -- doesn't matter mathematically,
-    # but messes up doctests and various other things
-    # cf. Issue #461
-    r2vals = [replace(@sprintf("%.4f", val), "-0.0000" => "0.0000") for val in ftr.r2]
-
-    outrows[2, :] = ["[1]", @sprintf("%.0d", ftr.dof[1]), " ",
-                     @sprintf("%.4f", ftr.ssr[1]), " ",
-                     r2vals[1], " ", " ", " "]
+    outrows[2, :] = [@sprintf("%.4f", ftr.ssr[1]),
+                     @sprintf("%.0d", ftr.dof[1]),
+                     " ",
+                     " ",
+                     " ",
+                     " "]
 
     for i in 2:nr
-        outrows[i + 1, :] = ["[$i]",
-                             @sprintf("%.0d", ftr.dof[i]), @sprintf("%.0d", Δdof[i - 1]),
-                             @sprintf("%.4f", ftr.ssr[i]), @sprintf("%.4f", Δssr[i - 1]),
-                             r2vals[i], @sprintf("%.4f", ΔR²[i - 1]),
-                             @sprintf("%.4f", ftr.fstat[i]), string(PValue(ftr.pval[i]))]
+        outrows[i + 1, :] = [@sprintf("%.4f", ftr.ssr[i]),
+                             @sprintf("%.0d", ftr.dof[i]),
+                             @sprintf("%.4f", Δssr[i - 1]),
+                             @sprintf("%.0d", Δdof[i - 1]),
+                             @sprintf("%.4f", ftr.fstat[i]),
+                             string(PValue(ftr.pval[i]))]
     end
     colwidths = length.(outrows)
     max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc]
-    totwidth = sum(max_colwidths) + 2 * 8
+    totwidth = sum(max_colwidths) + 2 * (nc - 1)
 
     println(io, "F-test: $N models fitted on $(ftr.nobs) observations")
     println(io, '─'^totwidth)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1590,55 +1590,55 @@ end
     ft1a = ftest(mod, nullmod)
     @test isnan(ft1a.pval[1])
     @test ft1a.pval[2] ≈ 2.481215056713184e-8
-    @test sprint(show, ft1a) == """
+    @test sprint(show, "text/plain", ft1a) == """
         F-test: 2 models fitted on 12 observations
-        ────────────────────────────────────────────────────────────────
-             DOF  ΔDOF     SSR    ΔSSR      R²     ΔR²        F*   p(>F)
-        ────────────────────────────────────────────────────────────────
-        [1]    3        0.1283          0.9603                          
-        [2]    2     1  3.2292  3.1008  0.0000  0.9603  241.6234  <1e-07
-        ────────────────────────────────────────────────────────────────"""
+        ───────────────────────────────────────────
+           SSR  dof    ΔSSR  Δdof        F*   p(>F)
+        ───────────────────────────────────────────
+        0.1283   10                                
+        3.2292   11  3.1008     1  241.6234  <1e-07
+        ───────────────────────────────────────────"""
 
     ft1b = ftest(nullmod, mod)
     @test isnan(ft1b.pval[1])
     @test ft1b.pval[2] ≈ 2.481215056713184e-8
-    @test sprint(show, ft1b) == """
+    @test sprint(show, "text/plain", ft1b) == """
         F-test: 2 models fitted on 12 observations
-        ────────────────────────────────────────────────────────────────
-             DOF  ΔDOF     SSR    ΔSSR      R²     ΔR²        F*   p(>F)
-        ────────────────────────────────────────────────────────────────
-        [1]    2        3.2292          0.0000                          
-        [2]    3     1  0.1283  3.1008  0.9603  0.9603  241.6234  <1e-07
-        ────────────────────────────────────────────────────────────────"""
+        ───────────────────────────────────────────
+           SSR  dof    ΔSSR  Δdof        F*   p(>F)
+        ───────────────────────────────────────────
+        3.2292   11                                
+        0.1283   10  3.1008     1  241.6234  <1e-07
+        ───────────────────────────────────────────"""
 
     bigmod = lm(@formula(Result ~ Treatment + Other), d)
     ft2a = ftest(nullmod, mod, bigmod)
     @test isnan(ft2a.pval[1])
     @test ft2a.pval[2] ≈ 2.481215056713184e-8
     @test ft2a.pval[3] ≈ 0.3949973540194818
-    @test sprint(show, ft2a) == """
+    @test sprint(show, "text/plain", ft2a) == """
         F-test: 3 models fitted on 12 observations
-        ────────────────────────────────────────────────────────────────
-             DOF  ΔDOF     SSR    ΔSSR      R²     ΔR²        F*   p(>F)
-        ────────────────────────────────────────────────────────────────
-        [1]    2        3.2292          0.0000                          
-        [2]    3     1  0.1283  3.1008  0.9603  0.9603  241.6234  <1e-07
-        [3]    5     2  0.1017  0.0266  0.9685  0.0082    1.0456  0.3950
-        ────────────────────────────────────────────────────────────────"""
+        ───────────────────────────────────────────
+           SSR  dof    ΔSSR  Δdof        F*   p(>F)
+        ───────────────────────────────────────────
+        3.2292   11                                
+        0.1283   10  3.1008     1  241.6234  <1e-07
+        0.1017    8  0.0266     2    1.0456  0.3950
+        ───────────────────────────────────────────"""
 
     ft2b = ftest(bigmod, mod, nullmod)
     @test isnan(ft2b.pval[1])
     @test ft2b.pval[2] ≈ 0.3949973540194818
     @test ft2b.pval[3] ≈ 2.481215056713184e-8
-    @test sprint(show, ft2b) == """
+    @test sprint(show, "text/plain", ft2b) == """
         F-test: 3 models fitted on 12 observations
-        ────────────────────────────────────────────────────────────────
-             DOF  ΔDOF     SSR    ΔSSR      R²     ΔR²        F*   p(>F)
-        ────────────────────────────────────────────────────────────────
-        [1]    5        0.1017          0.9685                          
-        [2]    3     2  0.1283  0.0266  0.9603  0.0082    1.0456  0.3950
-        [3]    2     1  3.2292  3.1008  0.0000  0.9603  241.6234  <1e-07
-        ────────────────────────────────────────────────────────────────"""
+        ───────────────────────────────────────────
+           SSR  dof    ΔSSR  Δdof        F*   p(>F)
+        ───────────────────────────────────────────
+        0.1017    8                                
+        0.1283   10  0.0266     2    1.0456  0.3950
+        3.2292   11  3.1008     1  241.6234  <1e-07
+        ───────────────────────────────────────────"""
 
     @test_throws ArgumentError ftest(mod, bigmod, nullmod)
     @test_throws ArgumentError ftest(nullmod, bigmod, mod)