Skip to content

Commit 688561c

Browse files
Slightly nicer error messages when a variable is missing in the data table. (#235)
* Attempt to make nicer error messages when a variable has been misnamed * attempt shorter stack trace * cleanup comments * split error stuff out to its own file and apply to `concrete_terms`. * typo * Make slightly nicer error messages when creating concrete_terms from tables. * Update src/schema.jl Co-authored-by: Dave Kleinschmidt <[email protected]> * Update src/schema.jl Co-authored-by: Dave Kleinschmidt <[email protected]> * Update src/errormessages.jl Co-authored-by: Dave Kleinschmidt <[email protected]> * Update src/errormessages.jl Co-authored-by: Dave Kleinschmidt <[email protected]> * Update src/errormessages.jl Co-authored-by: Dave Kleinschmidt <[email protected]> * Update src/modelframe.jl Co-authored-by: Dave Kleinschmidt <[email protected]> Co-authored-by: Dave Kleinschmidt <[email protected]>
1 parent e3f6ea1 commit 688561c

File tree

8 files changed

+85
-8
lines changed

8 files changed

+85
-8
lines changed

Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
77
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
88
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
99
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
10+
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
1011
ShiftedArrays = "1277b4bf-5013-50f5-be3d-901d8477a67a"
1112
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
1213
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"

src/StatsModels.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ using LinearAlgebra
1515

1616
using Tables: ColumnTable
1717

18+
using REPL: levenshtein
19+
1820
export
1921
#re-export from StatsBase:
2022
StatisticalModel,
@@ -67,6 +69,7 @@ export
6769
include("traits.jl")
6870
include("contrasts.jl")
6971
include("terms.jl")
72+
include( "errormessages.jl")
7073
include("schema.jl")
7174
include("temporal_terms.jl")
7275
include("formula.jl")

src/errormessages.jl

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
2+
"""
3+
This is borrowed from [DataFrames.jl]().
4+
Return between 0 and 2 names from `colnames` closest to `name`.
5+
`colnames` : some iterable collection of symbols
6+
"""
7+
function fuzzymatch(colnames, name::Symbol)
8+
ucname = uppercase(string(name))
9+
dist = [(levenshtein(uppercase(string(x)), ucname), x) for x in colnames]
10+
sort!(dist)
11+
c = [count(x -> x[1] <= i, dist) for i in 0:2]
12+
maxd = max(0, searchsortedlast(c, 8) - 1)
13+
return [s for (d, s) in dist if d <= maxd]
14+
end
15+
16+
"""
17+
Return a nice-ish error message if the Symbol `name` isn't a column name in `table`, otherwise a zero-length string.
18+
"""
19+
function checkcol(table, name::Symbol)
20+
i = Tables.columnindex(table, name)
21+
if i == 0 # if no such column
22+
names = Tables.columnnames(table)
23+
nearestnames = join(fuzzymatch(names, name),", " )
24+
return "There isn't a variable called '$name' in your data; the nearest names appear to be: $nearestnames"
25+
end
26+
return ""
27+
end
28+
29+
"""
30+
Check that each name in the given model `f` exists in the data source `t` and return a message if not. Return a zero string otherwise.
31+
`t` is something that implements the `Tables` interface.
32+
"""
33+
function checknamesexist(f::FormulaTerm, t)
34+
if ! Tables.istable(t)
35+
throw(ArgumentError( "$(typeof(t)) isn't a valid Table type" ))
36+
end
37+
for n in StatsModels.termvars(f)
38+
msg = checkcol(t, n)
39+
if msg != ""
40+
return msg
41+
end
42+
end
43+
return ""
44+
end

src/modelframe.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,14 @@ end
6868

6969
missing_omit(data::T, formula::AbstractTerm) where T<:ColumnTable =
7070
missing_omit(NamedTuple{tuple(termvars(formula)...)}(data))
71-
7271
function ModelFrame(f::FormulaTerm, data::ColumnTable;
7372
model::Type{M}=StatisticalModel, contrasts=Dict{Symbol,Any}()) where M
73+
74+
msg = checknamesexist( f, data )
75+
if msg != ""
76+
throw(ArgumentError(msg))
77+
end
78+
7479
data, _ = missing_omit(data, f)
7580

7681
sch = schema(f, data, contrasts)

src/schema.jl

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,12 +166,25 @@ julia> concrete_term(term(:a), (a = [1, 2, 3], b = [0.0, 0.5, 1.0]))
166166
a(continuous)
167167
```
168168
"""
169-
concrete_term(t::Term, d, hints::Dict{Symbol}) =
170-
concrete_term(t, d, get(hints, t.sym, nothing))
171-
concrete_term(t::Term, dt::ColumnTable, hint) =
172-
concrete_term(t, getproperty(dt, t.sym), hint)
173-
concrete_term(t::Term, dt::ColumnTable, hints::Dict{Symbol}) =
174-
concrete_term(t, getproperty(dt, t.sym), get(hints, t.sym, nothing))
169+
concrete_term(t::Term, d, hints::Dict{Symbol}) = concrete_term(t, d, get(hints, t.sym, nothing))
170+
171+
function concrete_term(t::Term, dt::ColumnTable, hint)
172+
msg::String = checkcol( dt, t.sym )
173+
if msg != ""
174+
throw(ArgumentError(msg))
175+
end
176+
return concrete_term(t, getproperty(dt, t.sym), hint)
177+
end
178+
179+
function concrete_term(t::Term, dt::ColumnTable, hints::Dict{Symbol})
180+
msg::String = checkcol( dt, t.sym )
181+
if msg != ""
182+
throw(ArgumentError(msg))
183+
end
184+
return concrete_term(t, getproperty(dt, t.sym), get(hints, t.sym, nothing))
185+
end
186+
187+
175188
concrete_term(t::Term, d) = concrete_term(t, d, nothing)
176189

177190
# if the "hint" is already an AbstractTerm, use that

test/modelframe.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
mf = ModelFrame(f, df)
99
mm = ModelMatrix(mf)
1010
@test mm.assign == [1, 2, 2, 3, 4, 4]
11+
12+
# `q` isn't in df - should throw ArgumentError
13+
f2 = @formula(x ~ y * q)
14+
@test_throws ArgumentError ModelFrame(f2, df)
15+
1116
end
1217

1318
end

test/statsmodel.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ Base.show(io::IO, m::DummyModTwo) = println(io, m.msg)
189189
@test predict(m, d4) == predict(m, d)
190190

191191
## attempting to fit with d4 should fail since it doesn't have :y
192-
@test_throws ErrorException fit(DummyMod, f, d4)
192+
@test_throws ArgumentError fit(DummyMod, f, d4)
193193

194194
## fit with contrasts specified
195195
d.x2p = categorical(d.x2)

test/terms.jl

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,4 +210,10 @@ StatsModels.apply_schema(mt::MultiTerm, sch::StatsModels.Schema, Mod::Type) =
210210
@test "$((a, ()))" == "(a, ())"
211211
end
212212

213+
@testset "concrete_term error messages" begin
214+
t = (a = [1, 2, 3], b = [0.0, 0.5, 1.0])
215+
@test Tables.istable(t)
216+
@test_throws ArgumentError concrete_term(term(:not_there), t )
217+
end
218+
213219
end

0 commit comments

Comments
 (0)