Make coeftable(::MatrixTerm) always return Vector{String} (#334)

andreasnoack · web-flow · commit f101d32419ca · 2025-12-23T11:03:10.000+01:00
diff --git a/src/terms.jl b/src/terms.jl
@@ -573,13 +573,13 @@ Return the name(s) of column(s) generated by a term.  Return value is either a
 See also [`termnames`](@ref).
 """
 StatsAPI.coefnames(t::FormulaTerm) = (coefnames(t.lhs), coefnames(t.rhs))
-StatsAPI.coefnames(::InterceptTerm{H}) where {H} = H ? "(Intercept)" : []
+StatsAPI.coefnames(::InterceptTerm{H}) where {H} = H ? ["(Intercept)"] : String[]
 StatsAPI.coefnames(t::ContinuousTerm) = string(t.sym)
 StatsAPI.coefnames(t::CategoricalTerm) =
     ["$(t.sym): $name" for name in t.contrasts.coefnames]
 StatsAPI.coefnames(t::FunctionTerm) = string(t.exorig)
 StatsAPI.coefnames(ts::TupleTerm) = reduce(vcat, coefnames.(ts))
-StatsAPI.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms)
+StatsAPI.coefnames(t::MatrixTerm) = mapreduce(coefnames, vcat, t.terms; init = String[])
 StatsAPI.coefnames(t::InteractionTerm) =
     kron_insideout((args...) -> join(args, " & "), vectorize.(coefnames.(t.terms))...)
 
diff --git a/test/modelmatrix.jl b/test/modelmatrix.jl
@@ -1,5 +1,5 @@
 @testset "Model matrix" begin
-    
+
     using StatsBase: StatisticalModel
 
     using SparseArrays, DataFrames, Tables
@@ -14,7 +14,7 @@
     d.x1p = categorical(d.x1)
 
     d_orig = deepcopy(d)
-    
+
     x1 = [5.:8;]
     x2 = [9.:12;]
     x3 = [13.:16;]
@@ -161,8 +161,8 @@
                       z = repeat([:e, :f], inner = 4))
         cs = Dict([Symbol(name) => EffectsCoding() for name in names(d)])
         d.n = 1.:8
-    
-    
+
+
         ## No intercept
         mf = ModelFrame(@formula(n ~ 0 + x), d, contrasts=cs)
         mm = ModelMatrix(mf)
@@ -182,8 +182,8 @@
         mm = ModelMatrix(mf)
         @test all(mm.m .== ifelse.(d.x .== :a, -1, 1))
         @test coefnames(mf) == ["x: b"]
-        
-    
+
+
         ## No first-order term for interaction
         mf = ModelFrame(@formula(n ~ 1 + x + x&y), d, contrasts=cs)
         mm = ModelMatrix(mf)
@@ -197,7 +197,7 @@
                                  1  0  1]
         @test mm.m == ModelMatrix{sparsetype}(mf).m
         @test coefnames(mf) == ["(Intercept)", "x: b", "x: a & y: d", "x: b & y: d"]
-    
+
         ## When both terms of interaction are non-redundant:
         mf = ModelFrame(@formula(n ~ 0 + x&y), d, contrasts=cs)
         mm = ModelMatrix(mf)
@@ -218,7 +218,7 @@
         mm = ModelMatrix(mf)
         @test mm.m == Matrix(1.0I, 8, 8)
         @test mm.m == ModelMatrix{sparsetype}(mf).m
-    
+
         # two two-way interactions, with no lower-order term. both are promoted in
         # first (both x and y), but only the old term (x) in the second (because
         # dropping x gives z which isn't found elsewhere, but dropping z gives x
@@ -237,7 +237,7 @@
         @test coefnames(mf) == ["x: a & y: c", "x: b & y: c",
                                 "x: a & y: d", "x: b & y: d",
                                 "x: a & z: f", "x: b & z: f"]
-    
+
         # ...and adding a three-way interaction, only the shared term (x) is promoted.
         # this is because dropping x gives y&z which isn't present, but dropping y or z
         # gives x&z or x&z respectively, which are both present.
@@ -256,7 +256,7 @@
                                 "x: a & y: d", "x: b & y: d",
                                 "x: a & z: f", "x: b & z: f",
                                 "x: a & y: d & z: f", "x: b & y: d & z: f"]
-    
+
         # two two-way interactions, with common lower-order term. the common term x is
         # promoted in both (along with lower-order term), because in every case, when
         # x is dropped, the remaining terms (1, y, and z) aren't present elsewhere.
@@ -274,8 +274,8 @@
         @test coefnames(mf) == ["x: a", "x: b",
                                 "x: a & y: d", "x: b & y: d",
                                 "x: a & z: f", "x: b & z: f"]
-    
-    
+
+
         ## FAILS: When both terms are non-redundant and intercept is PRESENT
         ## (not fully redundant). Ideally, would drop last column. Might make sense
         ## to warn about this, and suggest recoding x and y into a single variable.
@@ -286,7 +286,7 @@
                                            1 0 0 0]
         @test_broken coefnames(mf) == ["x: a & y: c", "x: b & y: c",
                                        "x: a & y: d", "x: b & y: d"]
-    
+
         ## note that R also does not detect this automatically. it's left to glm et al.
         ## to detect numerically when the model matrix is rank deficient, which is hard
         ## to do correctly.
@@ -343,7 +343,7 @@
                       x = repeat([:a, :b], outer = 4),
                       y = repeat([:c, :d], inner = 2, outer = 2),
                       z = repeat([:e, :f], inner = 4))
-    
+
         f = apply_schema(@formula(r ~ 1 + w*x*y*z), schema(d))
         modelmatrix(f, d)
         @test reduce(vcat, last.(modelcols.(Ref(f), Tables.rowtable(d)))') == modelmatrix(f,d)
@@ -355,7 +355,7 @@
                       x = repeat([:a, :b], outer = 4),
                       y = repeat([:c, :d], inner = 2, outer = 2),
                       z = repeat([:e, :f], inner = 4))
-    
+
         f = @formula(r ~ 1 + w*x*y*z)
 
         mm1 = modelmatrix(f, d)
@@ -375,19 +375,19 @@
                    C=repeat(['L','H'], inner=4))
 
             contrasts = Dict(:A=>HelmertCoding(), :B=>HelmertCoding(), :C=>HelmertCoding())
-                                                  
-                                                  
+
+
 
             mf = ModelFrame(@formula(Y ~ 1 + A*B*C), tbl)
             mf_helm = ModelFrame(@formula(Y ~ 1 + A*B*C), tbl, contrasts = contrasts)
 
             @test size(modelmatrix(mf)) == size(modelmatrix(mf_helm))
-            
+
             mf_helm2 = setcontrasts!(ModelFrame(@formula(Y ~ 1 + A*B*C), tbl), contrasts)
 
             @test size(modelmatrix(mf)) == size(modelmatrix(mf_helm2))
             @test modelmatrix(mf_helm) == modelmatrix(mf_helm2)
-            
+
         end
     end
 
@@ -402,5 +402,15 @@
         f = apply_schema(@formula(0 ~ a&b&c), schema(t))
         @test vec(modelcols(f.rhs, t)) == modelcols.(Ref(f.rhs), Tables.rowtable(t))
     end
-    
+
+    @testset "#112. coefnames should return same type for all rhs: $(f)" for f in [
+        @formula(y ~ 1),
+        @formula(y ~ x1 + 0),
+        @formula(y ~ x1),
+        @formula(y ~ x1 + x2),
+    ]
+        df = (y = [1.0, 1.0], x1 = [1, 2], x2 = ["A", "B"])
+        _f = apply_schema(f, schema(f, df))
+        @test coefnames(_f.rhs) isa Vector{String}
+    end
 end
diff --git a/test/temporal_terms.jl b/test/temporal_terms.jl
@@ -26,7 +26,7 @@ using DataStructures
             resp, pred = modelcols(f, df)
 
             @test isequal(pred[:, 1], [missing; 1.0:9])
-            @test coefnames(f)[2] == "x_lag1"
+            @test coefnames(f)[2] == ["x_lag1"]
         end
 
         @testset "Row Table" begin
@@ -53,7 +53,7 @@ using DataStructures
             resp, pred = modelcols(neg_f, df);
 
             @test isequal(pred[:, 1], [3.0:10; missing; missing])
-            @test coefnames(neg_f)[2] == "x_lag-2"
+            @test coefnames(neg_f)[2] == ["x_lag-2"]
         end
 
         @testset "Categorical Term use" begin
@@ -184,7 +184,5 @@ using DataStructures
                 @test coefnames(t1) == coefnames(t2) == coefnames(t3)
             end
         end
-
-        
     end
 end