-
Notifications
You must be signed in to change notification settings - Fork 34
Open
Description
Calling StatsModels.fit
with a not yet seen formula seems to trigger pretty slow compilation, even if a structurally equivalent formula with different names has been seen before. Triggering fit
with a formula which has been seen before is very fast.
The below reproducing example using GLM
and DataFrames
, and closely mimics how I stumbled upon this issue in the wild. I'm not familiar with the StatsModels/GLM internals, but if this example isn't minimal enough I can try to drill down.
julia> using StatsModels, DataFrames, GLM
julia> df = DataFrame(rand(100,100));
julia> function f(df, a, b, c)
reg_form = Term(a) ~ Term(b) + Term(c)
return r2(lm(reg_form, df))
end
f (generic function with 1 method)
julia> # precompile f with arg types dataframe, symbol, symbol, symbol
julia> f(df, :x1, :x2, :x3)
0.0033857274581728936
julia> # That was really slow, around 7 secons. Future calls with the exact same symbols are fast
julia> # call f with the same arg types, but new symbol values
julia> @elapsed f(df, :x1, :x2, :x4)
0.1441765
julia> # call f again with already-seen symbol values, and it's super fast
julia> @elapsed f(df, :x1, :x2, :x4)
0.00016
julia> versioninfo()
Julia Version 1.6.0
Commit f9720dc2eb (2021-03-24 12:55 UTC)
Platform Info:
OS: Windows (x86_64-w64-mingw32)
CPU: AMD Ryzen 5 1600X Six-Core Processor
WORD_SIZE: 64
LIBM: libopenlibm
LLVM: libLLVM-11.0.1 (ORCJIT, znver1)
Environment:
JULIAPATH = C:\Users\ejfie\AppData\Local\Programs\Julia-1.6.0\bin
JULIA_NUM_THREADS = 4
(@v1.6) pkg> st StatsModels
Status `C:\Users\ejfie\.julia\environments\v1.6\Project.toml`
[3eaba693] StatsModels v0.6.21
julia> using Profile
julia> @profile begin
for i in 5:20
symb = Symbol("x$i")
f(df, :x1, :x2, symb)
end
end
julia> Profile.print(mincount=50,maxdepth=30)
Overhead ╎ [+additional indent] Count File:Line; Function
=========================================================
╎1254 @Base\client.jl:485; _start()
╎ 1254 @Base\client.jl:302; exec_options(opts::Base.JLOptions)
╎ 1254 @Base\client.jl:372; run_main_repl(interactive::Bool, quiet::Bool, banner::Bool, history_file::Bool, color_set::Bool)
╎ 1254 @Base\essentials.jl:706; invokelatest
╎ 1254 @Base\essentials.jl:708; #invokelatest#2
╎ 1254 @Base\client.jl:387; (::Base.var"#874#876"{Bool, Bool, Bool})(REPL::Module)
╎ ╎ 1254 C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:305; run_repl(repl::REPL.AbstractREPL, consumer::Any)
╎ ╎ 1254 C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:317; run_repl(repl::REPL.AbstractREPL, consumer::Any; backend_on_current_task::Bool)
╎ ╎ 1254 C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:185; start_repl_backend(backend::REPL.REPLBackend, consumer::Any)
╎ ╎ 1254 C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:200; repl_backend_loop(backend::REPL.REPLBackend)
╎ ╎ 1254 C:\buildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\REPL\src\REPL.jl:139; eval_user_input(ast::Any, backend::REPL.REPLBackend)
╎ ╎ ╎ 1254 @Base\boot.jl:360; eval
╎ ╎ ╎ 1254 ...uildbot\worker\package_win64\build\usr\share\julia\stdlib\v1.6\Profile\src\Profile.jl:28; top-level scope
╎ ╎ ╎ 1254 REPL[9]:4; macro expansion
╎ ╎ ╎ 1254 REPL[3]:3; f(df::DataFrame, a::Symbol, b::Symbol, c::Symbol)
╎ ╎ ╎ 1254 @GLM\src\lm.jl:156; lm
╎ ╎ ╎ ╎ 1254 @GLM\src\lm.jl:156; lm
╎ ╎ ╎ ╎ 1254 @GLM\src\lm.jl:156; #lm#2
╎ ╎ ╎ ╎ 1254 @StatsModels\src\statsmodel.jl:82; fit
╎ ╎ ╎ ╎ 518 @StatsModels\src\statsmodel.jl:85; fit(::Type{LinearModel}, f::FormulaTerm{Term, Tuple{Term, Term}}, data::DataFrame, args::Nothing; contrasts::Dict{Symbol, An...
╎ ╎ ╎ ╎ 518 @StatsModels\src\modelframe.jl:74; (::Core.var"#Type##kw")(::NamedTuple{(:model, :contrasts), Tuple{UnionAll, Dict{Symbol, Any}}}, ::Type{ModelFrame}, f::Formu...
╎ ╎ ╎ ╎ ╎ 166 @StatsModels\src\modelframe.jl:74; ModelFrame(f::FormulaTerm{Term, Tuple{Term, Term}}, data::NamedTuple{(:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x...
101╎ ╎ ╎ ╎ ╎ 166 @StatsModels\src\modelframe.jl:69; missing_omit(data::NamedTuple{(:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x11, :x12, :x13, :x14, :x15, :x16, :x17,...
╎ ╎ ╎ ╎ ╎ 64 @Base\compiler\typeinfer.jl:921; typeinf_ext_toplevel(mi::Core.MethodInstance, world::UInt64)
╎ ╎ ╎ ╎ ╎ 64 @Base\compiler\typeinfer.jl:925; typeinf_ext_toplevel(interp::Core.Compiler.NativeInterpreter, linfo::Core.MethodInstance)
╎ ╎ ╎ ╎ ╎ 58 @Base\compiler\typeinfer.jl:892; typeinf_ext(interp::Core.Compiler.NativeInterpreter, mi::Core.MethodInstance)
╎ ╎ ╎ ╎ ╎ ╎ 58 @Base\compiler\typeinfer.jl:209; typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 53 @Base\compiler\typeinfer.jl:214; _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 53 @Base\compiler\abstractinterpretation.jl:1520; typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
221╎ ╎ ╎ ╎ ╎ 273 @StatsModels\src\modelframe.jl:76; ModelFrame(f::FormulaTerm{Term, Tuple{Term, Term}}, data::NamedTuple{(:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x...
╎ ╎ ╎ ╎ ╎ 52 @Base\compiler\typeinfer.jl:921; typeinf_ext_toplevel(mi::Core.MethodInstance, world::UInt64)
╎ ╎ ╎ ╎ ╎ 52 @Base\compiler\typeinfer.jl:925; typeinf_ext_toplevel(interp::Core.Compiler.NativeInterpreter, linfo::Core.MethodInstance)
╎ ╎ ╎ ╎ ╎ 52 @Base\compiler\typeinfer.jl:892; typeinf_ext(interp::Core.Compiler.NativeInterpreter, mi::Core.MethodInstance)
╎ ╎ ╎ ╎ ╎ 52 @Base\compiler\typeinfer.jl:209; typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 51 @Base\compiler\typeinfer.jl:214; _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 51 @Base\compiler\abstractinterpretation.jl:1520; typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 51 @Base\compiler\abstractinterpretation.jl:1462; typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 51 @Base\compiler\abstractinterpretation.jl:1167; abstract_eval_statement(interp::Core.Compiler.NativeInterpreter, e::Any, vtypes::Vector{Any}, sv::Core.Compiler.Infer...
╎ ╎ ╎ ╎ ╎ ╎ 51 @Base\compiler\abstractinterpretation.jl:1040; abstract_call(interp::Core.Compiler.NativeInterpreter, fargs::Vector{Any}, argtypes::Vector{Any}, sv::Core.Compiler.I...
70╎ ╎ ╎ ╎ ╎ 79 @StatsModels\src\modelframe.jl:79; ModelFrame(f::FormulaTerm{Term, Tuple{Term, Term}}, data::NamedTuple{(:x1, :x2, :x3, :x4, :x5, :x6, :x7, :x8, :x9, :x10, :x...
85╎ ╎ ╎ ╎ 625 @StatsModels\src\statsmodel.jl:86; fit(::Type{LinearModel}, f::FormulaTerm{Term, Tuple{Term, Term}}, data::DataFrame, args::Nothing; contrasts::Dict{Symbol, An...
╎ ╎ ╎ ╎ 522 @StatsModels\src\modelframe.jl:222; ModelMatrix(mf::ModelFrame{NamedTuple{(:x1, :x2, :x5), Tuple{Vector{Float64}, Vector{Float64}, Vector{Float64}}}, LinearMod...
╎ ╎ ╎ ╎ ╎ 522 @StatsModels\src\modelframe.jl:218; ModelMatrix{Matrix{Float64}}(mf::ModelFrame{NamedTuple{(:x1, :x2, :x5), Tuple{Vector{Float64}, Vector{Float64}, Vector{Floa...
╎ ╎ ╎ ╎ ╎ 522 @StatsModels\src\modelframe.jl:147; modelmatrix
432╎ ╎ ╎ ╎ ╎ 522 @StatsModels\src\modelframe.jl:147; modelmatrix(mf::ModelFrame{NamedTuple{(:x1, :x2, :x5), Tuple{Vector{Float64}, Vector{Float64}, Vector{Float64}}}, LinearM...
╎ ╎ ╎ ╎ ╎ 90 @Base\compiler\typeinfer.jl:921; typeinf_ext_toplevel(mi::Core.MethodInstance, world::UInt64)
╎ ╎ ╎ ╎ ╎ 90 @Base\compiler\typeinfer.jl:925; typeinf_ext_toplevel(interp::Core.Compiler.NativeInterpreter, linfo::Core.MethodInstance)
╎ ╎ ╎ ╎ ╎ ╎ 90 @Base\compiler\typeinfer.jl:892; typeinf_ext(interp::Core.Compiler.NativeInterpreter, mi::Core.MethodInstance)
╎ ╎ ╎ ╎ ╎ ╎ 90 @Base\compiler\typeinfer.jl:209; typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 89 @Base\compiler\typeinfer.jl:214; _typeinf(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 89 @Base\compiler\abstractinterpretation.jl:1520; typeinf_nocycle(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
╎ ╎ ╎ ╎ ╎ ╎ 89 @Base\compiler\abstractinterpretation.jl:1462; typeinf_local(interp::Core.Compiler.NativeInterpreter, frame::Core.Compiler.InferenceState)
62╎ ╎ ╎ ╎ 65 @StatsModels\src\statsmodel.jl:87; fit(::Type{LinearModel}, f::FormulaTerm{Term, Tuple{Term, Term}}, data::DataFrame, args::Nothing; contrasts::Dict{Symbol, An...
Total snapshots: 1269
Metadata
Metadata
Assignees
Labels
No labels