Skip to content

Commit 81a07a1

Browse files
authored
Improve printing CoefTable (#481)
Rely on printing code from Base to align numbers on the decimal separator. Unfortunately, we cannot treat column names as a normal row since names would be left-aligned with the decimal separator, which wastes space and is less clean. Instead, adjust the alignment manually so that columns are wide enough to contain names. Only convert values to PValue before printing, so that users can still access the contents of columns as normal numbers. This is convenient in particular for testing in packages. Print test statistics with 2 decimals. Add horizontal lines.
1 parent b7fe44c commit 81a07a1

File tree

2 files changed

+82
-57
lines changed

2 files changed

+82
-57
lines changed

src/statmodels.jl

Lines changed: 58 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -358,36 +358,41 @@ function params! end
358358

359359
## coefficient tables with specialized show method
360360

361-
## Nms are the coefficient names, corresponding to rows in the table
362361
mutable struct CoefTable
363362
cols::Vector
364363
colnms::Vector
365364
rownms::Vector
366-
function CoefTable(cols::Vector,colnms::Vector,rownms::Vector)
365+
pvalcol::Int
366+
teststatcol::Int
367+
function CoefTable(cols::Vector,colnms::Vector,rownms::Vector,
368+
pvalcol::Int=0,teststatcol::Int=0)
367369
nc = length(cols)
368370
nrs = map(length,cols)
369371
nr = nrs[1]
370-
length(colnms) in [0,nc] || error("colnms should have length 0 or $nc")
371-
length(rownms) in [0,nr] || error("rownms should have length 0 or $nr")
372-
all(nrs .== nr) || error("Elements of cols should have equal lengths, but got $nrs")
373-
new(cols,colnms,rownms)
372+
length(colnms) in [0,nc] || throw(ArgumentError("colnms should have length 0 or $nc"))
373+
length(rownms) in [0,nr] || throw(ArgumentError("rownms should have length 0 or $nr"))
374+
all(nrs .== nr) || throw(ArgumentError("Elements of cols should have equal lengths, but got $nrs"))
375+
pvalcol in 0:nc || throw(ArgumentError("pvalcol should be between 0 and $nc"))
376+
teststatcol in 0:nc || throw(ArgumentError("teststatcol should be between 0 and $nc"))
377+
new(cols,colnms,rownms,pvalcol,teststatcol)
374378
end
375379

376-
function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,pvalcol::Int=0)
380+
function CoefTable(mat::Matrix,colnms::Vector,rownms::Vector,
381+
pvalcol::Int=0,teststatcol::Int=0)
377382
nc = size(mat,2)
378383
cols = Any[mat[:, i] for i in 1:nc]
379-
if pvalcol != 0 # format the p-values column
380-
cols[pvalcol] = [PValue(cols[pvalcol][j])
381-
for j in eachindex(cols[pvalcol])]
382-
end
383-
CoefTable(cols,colnms,rownms)
384+
CoefTable(cols,colnms,rownms,pvalcol,teststatcol)
384385
end
385386
end
386387

387-
mutable struct PValue
388-
v::Number
389-
function PValue(v::Number)
390-
0. <= v <= 1. || isnan(v) || error("p-values must be in [0.,1.]")
388+
"""
389+
Show a p-value using 6 characters, either using the standard 0.XXXX
390+
representation or as <Xe-YY.
391+
"""
392+
struct PValue
393+
v::Real
394+
function PValue(v::Real)
395+
0 <= v <= 1 || isnan(v) || error("p-values must be in [0; 1]")
391396
new(v)
392397
end
393398
end
@@ -403,36 +408,52 @@ function show(io::IO, pv::PValue)
403408
end
404409
end
405410

411+
"""Show a test statistic using 2 decimal digits"""
412+
struct TestStat <: Real
413+
v::Real
414+
end
415+
416+
show(io::IO, x::TestStat) = @printf(io, "%.2f", x.v)
417+
418+
"""Wrap a string so that show omits quotes"""
419+
struct NoQuote
420+
s::String
421+
end
422+
423+
show(io::IO, n::NoQuote) = print(io, n.s)
424+
406425
function show(io::IO, ct::CoefTable)
407426
cols = ct.cols; rownms = ct.rownms; colnms = ct.colnms;
408427
nc = length(cols)
409428
nr = length(cols[1])
410429
if length(rownms) == 0
411430
rownms = [lpad("[$i]",floor(Integer, log10(nr))+3) for i in 1:nr]
412431
end
413-
rnwidth = max(4,maximum([length(nm) for nm in rownms]) + 1)
414-
rownms = [rpad(nm,rnwidth) for nm in rownms]
415-
widths = [length(cn)::Int for cn in colnms]
416-
str = String[isa(cols[j][i], AbstractString) ? cols[j][i] :
417-
sprint(show, cols[j][i], context=:compact=>true) for i in 1:nr, j in 1:nc]
418-
for j in 1:nc
419-
for i in 1:nr
420-
lij = length(str[i,j])
421-
if lij > widths[j]
422-
widths[j] = lij
423-
end
424-
end
432+
mat = [j == 1 ? NoQuote(rownms[i]) :
433+
j-1 == ct.pvalcol ? PValue(cols[j-1][i]) :
434+
j-1 in ct.teststatcol ? TestStat(cols[j-1][i]) :
435+
cols[j-1][i] isa AbstractString ? NoQuote(cols[j-1][i]) : cols[j-1][i]
436+
for i in 1:nr, j in 1:nc+1]
437+
# Code inspired by print_matrix in Base
438+
io = IOContext(io, :compact=>true, :limit=>false)
439+
A = Base.alignment(io, mat, 1:size(mat, 1), 1:size(mat, 2),
440+
typemax(Int), typemax(Int), 3)
441+
nmswidths = pushfirst!(length.(colnms), 0)
442+
A = [nmswidths[i] > sum(A[i]) ? (A[i][1]+nmswidths[i]-sum(A[i]), A[i][2]) : A[i]
443+
for i in 1:length(A)]
444+
totwidth = sum(sum.(A)) + 2 * (length(A) - 1)
445+
println(io, repeat('', totwidth))
446+
print(io, repeat(' ', sum(A[1])))
447+
for j in 1:length(colnms)
448+
print(io, " ", lpad(colnms[j], sum(A[j+1])))
425449
end
426-
widths .+= 1
427-
println(io," " ^ rnwidth *
428-
join([lpad(string(colnms[i]), widths[i]) for i = 1:nc], ""))
429-
for i = 1:nr
430-
print(io, rownms[i])
431-
for j in 1:nc
432-
print(io, lpad(str[i,j],widths[j]))
433-
end
434-
println(io)
450+
println(io, '\n', repeat('', totwidth))
451+
for i in 1:size(mat, 1)
452+
Base.print_matrix_row(io, mat, A, i, 1:size(mat, 2), " ")
453+
i != size(mat, 1) && println(io)
435454
end
455+
print(io, '\n', repeat('', totwidth))
456+
nothing
436457
end
437458

438459
"""

test/statmodels.jl

Lines changed: 24 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,32 @@
11
using StatsBase
22
using Test, Random
33

4-
Random.seed!(10)
5-
v1 = rand(3)
4+
v1 = [1.45666, -23.14, 1.56734e-13]
65
v2 = ["Good", "Great", "Bad"]
7-
v3 = rand(Int8, 3)
8-
v4 = [StatsBase.PValue(rand()./10000) for i in 1:3]
9-
m = rand(3,4)
10-
@test sprint(show, CoefTable(Any[v1, v2, v3, v4],
11-
["Estimate", "Comments", "df", "p"],
12-
["x1", "x2", "x3"])) == """
13-
Estimate Comments df p
14-
x1 0.112582 Good 88 <1e-4
15-
x2 0.368314 Great -90 <1e-4
16-
x3 0.344454 Bad -80 <1e-4
17-
"""
6+
v3 = [1, 56, 2]
7+
v4 = [-12.56, 0.1326, 2.68e-16]
8+
v5 = [0.12, 0.3467, 1.345e-16]
9+
@test sprint(show, CoefTable(Any[v1, v2, v3, v4, v5],
10+
["Estimate", "Comments", "df", "t", "p"],
11+
["x1", "x2", "x3"], 5, 4)) == """
12+
───────────────────────────────────────────────
13+
Estimate Comments df t p
14+
───────────────────────────────────────────────
15+
x1 1.45666 Good 1 -12.56 0.1200
16+
x2 -23.14 Great 56 0.13 0.3467
17+
x3 1.56734e-13 Bad 2 0.00 <1e-15
18+
───────────────────────────────────────────────"""
1819

19-
@test sprint(show, CoefTable(m, ["Estimate", "Stderror", "df", "p"],
20-
["x1", "x2", "x3"], 4)) == """
21-
Estimate Stderror df p
22-
x1 0.819778 0.844007 0.923676 0.1717
23-
x2 0.669931 0.67919 0.066098 0.4204
24-
x3 0.453058 0.72525 0.999172 0.5567
25-
"""
20+
Random.seed!(10)
21+
m = rand(3,4)
22+
@test sprint(show, CoefTable(m, ["Estimate", "Stderror", "df", "p"], [], 4)) == """
23+
──────────────────────────────────────────
24+
Estimate Stderror df p
25+
──────────────────────────────────────────
26+
[1] 0.112582 0.0566454 0.381813 0.8198
27+
[2] 0.368314 0.120781 0.815104 0.6699
28+
[3] 0.344454 0.179574 0.242208 0.4531
29+
──────────────────────────────────────────"""
2630

2731
@test sprint(show, StatsBase.PValue(1.0)) == "1.0000"
2832
@test sprint(show, StatsBase.PValue(1e-1)) == "0.1000"

0 commit comments

Comments
 (0)