Skip to content

Commit 368601e

Browse files
authored
Faster Corner Cases (#263)
* concrete_term(): skip intersect when levels=unique since levels(xs) and unique(xs) have the same source array, these sets are equal when their lengths are equal, and so no need to call intersect * missing_omit(): faster copying if no missings
1 parent d22674a commit 368601e

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

src/modelframe.jl

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,24 +50,26 @@ function _nonmissing!(res, col)
5050
res .&= .!ismissing.(col)
5151
end
5252

53-
53+
_missing_omit(x::AbstractVector{T}) where T = copyto!(similar(x, nonmissingtype(T)), x)
54+
_missing_omit(x::AbstractVector, rows) = _missing_omit(view(x, rows))
55+
5456
function missing_omit(d::T) where T<:ColumnTable
5557
nonmissings = trues(length(first(d)))
5658
for col in d
5759
_nonmissing!(nonmissings, col)
5860
end
59-
60-
rows = findall(nonmissings)
61-
d_nonmissing =
62-
NamedTuple{Tables.names(T)}(tuple((copyto!(similar(col,
63-
Base.nonmissingtype(eltype(col)),
64-
length(rows)),
65-
view(col, rows)) for col in d)...))
61+
d_nonmissing = if all(nonmissings)
62+
map(_missing_omit, d)
63+
else
64+
rows = findall(nonmissings)
65+
map(Base.Fix2(_missing_omit, rows), d)
66+
end
6667
d_nonmissing, nonmissings
6768
end
6869

6970
missing_omit(data::T, formula::AbstractTerm) where T<:ColumnTable =
7071
missing_omit(NamedTuple{tuple(termvars(formula)...)}(data))
72+
7173
function ModelFrame(f::FormulaTerm, data::ColumnTable;
7274
model::Type{M}=StatisticalModel, contrasts=Dict{Symbol,Any}()) where M
7375

src/schema.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,10 @@ concrete_term(t::Term, xs::AbstractVector, ::Nothing) = concrete_term(t, xs, Cat
206206
concrete_term(t::Term, xs::AbstractArray, ::Type{CategoricalTerm}) = concrete_term(t, xs, DummyCoding())
207207

208208
function concrete_term(t::Term, xs::AbstractArray, contrasts::AbstractContrasts)
209-
contrmat = ContrastsMatrix(contrasts, intersect(levels(xs), unique(xs)))
209+
xlevels = levels(xs)
210+
xunique = unique(xs)
211+
xused = length(xlevels) == length(xunique) ? xlevels : intersect(xlevels, xunique)
212+
contrmat = ContrastsMatrix(contrasts, xused)
210213
CategoricalTerm(t.sym, contrmat)
211214
end
212215

0 commit comments

Comments
 (0)