From 5651eb3819d303eab84d5a164c77c7fd76e60551 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sat, 20 Dec 2025 14:57:05 +0100 Subject: [PATCH 1/4] Update to Aqua@0.8 --- Project.toml | 6 +++++- test/runtests.jl | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/Project.toml b/Project.toml index 2fe9587e..58fbc604 100644 --- a/Project.toml +++ b/Project.toml @@ -16,12 +16,16 @@ StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] -Aqua = "0.7" +Aqua = "0.8" CategoricalArrays = "0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.10" DataAPI = "1.1" DataFrames = "1" DataStructures = "0.17, 0.18, 0.19" +LinearAlgebra = "<0.0.1, 1" +Printf = "<0.0.1, 1" +REPL = "<0.0.1, 1" ShiftedArrays = "1, 2" +SparseArrays = "<0.0.1, 1" Statistics = "1" StatsAPI = "1.7" StatsBase = "0.33.5, 0.34" diff --git a/test/runtests.jl b/test/runtests.jl index 2ab91651..4b240cee 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -32,7 +32,7 @@ my_tests = ["ambiguity.jl", # because VIF and GVIF are defined in StatsAPI for RegressionModel, # which is also defined there, it's flagged as piracy. But # we're the offical implementers so it's privateering. - Aqua.test_all(StatsModels; ambiguities=false, piracy=(treat_as_own=[vif, gvif],)) + Aqua.test_all(StatsModels; ambiguities=false, piracies=(treat_as_own=[vif, gvif],),) end for tf in my_tests From 2d02c9a71655718aa75525173308bf733cce8ca4 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Sat, 20 Dec 2025 16:04:31 +0100 Subject: [PATCH 2/4] Test on pre instead of nightly --- .github/workflows/ci-future.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-future.yml b/.github/workflows/ci-future.yml index f2667437..18b44994 100644 --- a/.github/workflows/ci-future.yml +++ b/.github/workflows/ci-future.yml @@ -17,7 +17,7 @@ jobs: fail-fast: false matrix: version: - - 'nightly' + - 'pre' os: - ubuntu-latest arch: From 143a2d223ff470aacbeb69d28af9cef9427894fe Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 23 Dec 2025 11:02:14 +0100 Subject: [PATCH 3/4] Update runtests.jl Co-authored-by: Milan Bouchet-Valat --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 4b240cee..a20e73df 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -32,7 +32,7 @@ my_tests = ["ambiguity.jl", # because VIF and GVIF are defined in StatsAPI for RegressionModel, # which is also defined there, it's flagged as piracy. But # we're the offical implementers so it's privateering. - Aqua.test_all(StatsModels; ambiguities=false, piracies=(treat_as_own=[vif, gvif],),) + Aqua.test_all(StatsModels; ambiguities=false, piracies=(treat_as_own=[vif, gvif],)) end for tf in my_tests From f101d32419cacfa8e6f765895103d3cc4665afc7 Mon Sep 17 00:00:00 2001 From: Andreas Noack Date: Tue, 23 Dec 2025 11:03:10 +0100 Subject: [PATCH 4/4] Make coeftable(::MatrixTerm) always return Vector{String} (#334) --- src/terms.jl | 4 ++-- test/modelmatrix.jl | 50 +++++++++++++++++++++++++----------------- test/temporal_terms.jl | 6 ++--- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/src/terms.jl b/src/terms.jl index 188120b1..72bab08f 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -573,13 +573,13 @@ Return the name(s) of column(s) generated by a term. Return value is either a See also [`termnames`](@ref). """ StatsAPI.coefnames(t::FormulaTerm) = (coefnames(t.lhs), coefnames(t.rhs)) -StatsAPI.coefnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : [] +StatsAPI.coefnames(::InterceptTerm{H}) where {H} = H ? ["(Intercept)"] : String[] StatsAPI.coefnames(t::ContinuousTerm) = string(t.sym) StatsAPI.coefnames(t::CategoricalTerm) = ["$(t.sym): $name" for name in t.contrasts.coefnames] StatsAPI.coefnames(t::FunctionTerm) = string(t.exorig) StatsAPI.coefnames(ts::TupleTerm) = reduce(vcat, coefnames.(ts)) -StatsAPI.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms) +StatsAPI.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms; init = String[]) StatsAPI.coefnames(t::InteractionTerm) = kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...) diff --git a/test/modelmatrix.jl b/test/modelmatrix.jl index efa1ff2f..6997e0ed 100644 --- a/test/modelmatrix.jl +++ b/test/modelmatrix.jl @@ -1,5 +1,5 @@ @testset "Model matrix" begin - + using StatsBase: StatisticalModel using SparseArrays, DataFrames, Tables @@ -14,7 +14,7 @@ d.x1p = categorical(d.x1) d_orig = deepcopy(d) - + x1 = [5.:8;] x2 = [9.:12;] x3 = [13.:16;] @@ -161,8 +161,8 @@ z = repeat([:e, :f], inner = 4)) cs = Dict([Symbol(name) => EffectsCoding() for name in names(d)]) d.n = 1.:8 - - + + ## No intercept mf = ModelFrame(@formula(n ~ 0 + x), d, contrasts=cs) mm = ModelMatrix(mf) @@ -182,8 +182,8 @@ mm = ModelMatrix(mf) @test all(mm.m .== ifelse.(d.x .== :a, -1, 1)) @test coefnames(mf) == ["x: b"] - - + + ## No first-order term for interaction mf = ModelFrame(@formula(n ~ 1 + x + x&y), d, contrasts=cs) mm = ModelMatrix(mf) @@ -197,7 +197,7 @@ 1 0 1] @test mm.m == ModelMatrix{sparsetype}(mf).m @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"] - + ## When both terms of interaction are non-redundant: mf = ModelFrame(@formula(n ~ 0 + x&y), d, contrasts=cs) mm = ModelMatrix(mf) @@ -218,7 +218,7 @@ mm = ModelMatrix(mf) @test mm.m == Matrix(1.0I, 8, 8) @test mm.m == ModelMatrix{sparsetype}(mf).m - + # two two-way interactions, with no lower-order term. both are promoted in # first (both x and y), but only the old term (x) in the second (because # dropping x gives z which isn't found elsewhere, but dropping z gives x @@ -237,7 +237,7 @@ @test coefnames(mf) == ["x: a & y: c", "x: b & y: c", "x: a & y: d", "x: b & y: d", "x: a & z: f", "x: b & z: f"] - + # ...and adding a three-way interaction, only the shared term (x) is promoted. # this is because dropping x gives y&z which isn't present, but dropping y or z # gives x&z or x&z respectively, which are both present. @@ -256,7 +256,7 @@ "x: a & y: d", "x: b & y: d", "x: a & z: f", "x: b & z: f", "x: a & y: d & z: f", "x: b & y: d & z: f"] - + # two two-way interactions, with common lower-order term. the common term x is # promoted in both (along with lower-order term), because in every case, when # x is dropped, the remaining terms (1, y, and z) aren't present elsewhere. @@ -274,8 +274,8 @@ @test coefnames(mf) == ["x: a", "x: b", "x: a & y: d", "x: b & y: d", "x: a & z: f", "x: b & z: f"] - - + + ## FAILS: When both terms are non-redundant and intercept is PRESENT ## (not fully redundant). Ideally, would drop last column. Might make sense ## to warn about this, and suggest recoding x and y into a single variable. @@ -286,7 +286,7 @@ 1 0 0 0] @test_broken coefnames(mf) == ["x: a & y: c", "x: b & y: c", "x: a & y: d", "x: b & y: d"] - + ## note that R also does not detect this automatically. it's left to glm et al. ## to detect numerically when the model matrix is rank deficient, which is hard ## to do correctly. @@ -343,7 +343,7 @@ x = repeat([:a, :b], outer = 4), y = repeat([:c, :d], inner = 2, outer = 2), z = repeat([:e, :f], inner = 4)) - + f = apply_schema(@formula(r ~ 1 + w*x*y*z), schema(d)) modelmatrix(f, d) @test reduce(vcat, last.(modelcols.(Ref(f), Tables.rowtable(d)))') == modelmatrix(f,d) @@ -355,7 +355,7 @@ x = repeat([:a, :b], outer = 4), y = repeat([:c, :d], inner = 2, outer = 2), z = repeat([:e, :f], inner = 4)) - + f = @formula(r ~ 1 + w*x*y*z) mm1 = modelmatrix(f, d) @@ -375,19 +375,19 @@ C=repeat(['L','H'], inner=4)) contrasts = Dict(:A=>HelmertCoding(), :B=>HelmertCoding(), :C=>HelmertCoding()) - - + + mf = ModelFrame(@formula(Y ~ 1 + A*B*C), tbl) mf_helm = ModelFrame(@formula(Y ~ 1 + A*B*C), tbl, contrasts = contrasts) @test size(modelmatrix(mf)) == size(modelmatrix(mf_helm)) - + mf_helm2 = setcontrasts!(ModelFrame(@formula(Y ~ 1 + A*B*C), tbl), contrasts) @test size(modelmatrix(mf)) == size(modelmatrix(mf_helm2)) @test modelmatrix(mf_helm) == modelmatrix(mf_helm2) - + end end @@ -402,5 +402,15 @@ f = apply_schema(@formula(0 ~ a&b&c), schema(t)) @test vec(modelcols(f.rhs, t)) == modelcols.(Ref(f.rhs), Tables.rowtable(t)) end - + + @testset "#112. coefnames should return same type for all rhs: $(f)" for f in [ + @formula(y ~ 1), + @formula(y ~ x1 + 0), + @formula(y ~ x1), + @formula(y ~ x1 + x2), + ] + df = (y = [1.0, 1.0], x1 = [1, 2], x2 = ["A", "B"]) + _f = apply_schema(f, schema(f, df)) + @test coefnames(_f.rhs) isa Vector{String} + end end diff --git a/test/temporal_terms.jl b/test/temporal_terms.jl index 2618f5c7..44228013 100644 --- a/test/temporal_terms.jl +++ b/test/temporal_terms.jl @@ -26,7 +26,7 @@ using DataStructures resp, pred = modelcols(f, df) @test isequal(pred[:, 1], [missing; 1.0:9]) - @test coefnames(f)[2] == "x_lag1" + @test coefnames(f)[2] == ["x_lag1"] end @testset "Row Table" begin @@ -53,7 +53,7 @@ using DataStructures resp, pred = modelcols(neg_f, df); @test isequal(pred[:, 1], [3.0:10; missing; missing]) - @test coefnames(neg_f)[2] == "x_lag-2" + @test coefnames(neg_f)[2] == ["x_lag-2"] end @testset "Categorical Term use" begin @@ -184,7 +184,5 @@ using DataStructures @test coefnames(t1) == coefnames(t2) == coefnames(t3) end end - - end end