|
1 | 1 | export @snoop_inference |
2 | 2 |
|
3 | | -struct InferenceTiming |
4 | | - mi_info::Core.Compiler.Timings.InferenceFrameInfo |
5 | | - inclusive_time::Float64 |
6 | | - exclusive_time::Float64 |
| 3 | +const snoop_inference_lock = ReentrantLock() |
| 4 | +const newly_inferred = CodeInstance[] |
| 5 | +const inference_entrance_backtraces = [] |
| 6 | + |
| 7 | +function start_tracking() |
| 8 | + iszero(snoop_inference_lock.reentrancy_cnt) || throw(ConcurrencyViolationError("already tracking inference (cannot nest `@snoop_inference` blocks)")) |
| 9 | + lock(snoop_inference_lock) |
| 10 | + empty!(newly_inferred) |
| 11 | + empty!(inference_entrance_backtraces) |
| 12 | + ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred) |
| 13 | + ccall(:jl_set_inference_entrance_backtraces, Cvoid, (Any,), inference_entrance_backtraces) |
| 14 | + return nothing |
7 | 15 | end |
8 | | -""" |
9 | | - inclusive(frame) |
10 | | -
|
11 | | -Return the time spent inferring `frame` and its callees. |
12 | | -""" |
13 | | -inclusive(it::InferenceTiming) = it.inclusive_time |
14 | | -""" |
15 | | - exclusive(frame) |
16 | | -
|
17 | | -Return the time spent inferring `frame`, not including the time needed for any of its callees. |
18 | | -""" |
19 | | -exclusive(it::InferenceTiming) = it.exclusive_time |
20 | 16 |
|
21 | | -struct InferenceTimingNode |
22 | | - mi_timing::InferenceTiming |
23 | | - start_time::Float64 |
24 | | - children::Vector{InferenceTimingNode} |
25 | | - bt |
26 | | - parent::InferenceTimingNode |
27 | | - |
28 | | - # Root constructor |
29 | | - InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt)) = |
30 | | - new(mi_timing, start_time, InferenceTimingNode[], bt) |
31 | | - # Child constructor |
32 | | - function InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt), parent::InferenceTimingNode) |
33 | | - child = new(mi_timing, start_time, InferenceTimingNode[], bt, parent) |
34 | | - push!(parent.children, child) |
35 | | - return child |
36 | | - end |
37 | | -end |
38 | | -inclusive(node::InferenceTimingNode) = inclusive(node.mi_timing) |
39 | | -exclusive(node::InferenceTimingNode) = exclusive(node.mi_timing) |
40 | | -InferenceTiming(node::InferenceTimingNode) = node.mi_timing |
41 | | - |
42 | | -function InferenceTimingNode(t::Core.Compiler.Timings.Timing) |
43 | | - ttree = timingtree(t) |
44 | | - it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}} |
45 | | - root = InferenceTimingNode(it, start_time, t.bt) |
46 | | - addchildren!(root, t, ttree_children) |
47 | | - return root |
48 | | -end |
49 | | - |
50 | | -# Compute inclusive times and store as a temporary tree. |
51 | | -# To allow InferenceTimingNode to be both bidirectional and immutable, we need to create parent node before the child nodes. |
52 | | -# However, each node stores its inclusive time, which can only be computed efficiently from the leaves up (children before parents). |
53 | | -# This performs the inclusive-time computation, storing the result as a "temporary tree" that can be used during |
54 | | -# InferenceTimingNode creation (see `addchildren!`). |
55 | | -function timingtree(t::Core.Compiler.Timings.Timing) |
56 | | - time, start_time = t.time/10^9, t.start_time/10^9 |
57 | | - incl_time = time |
58 | | - tchildren = [] |
59 | | - for child in t.children |
60 | | - tchild = timingtree(child) |
61 | | - push!(tchildren, tchild) |
62 | | - incl_time += inclusive(tchild[1]) |
63 | | - end |
64 | | - return (InferenceTiming(t.mi_info, incl_time, time), start_time, tchildren) |
65 | | -end |
66 | | - |
67 | | -function addchildren!(parent::InferenceTimingNode, t::Core.Compiler.Timings.Timing, ttrees) |
68 | | - for (child, ttree) in zip(t.children, ttrees) |
69 | | - it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}} |
70 | | - node = InferenceTimingNode(it, start_time, child.bt, parent) |
71 | | - addchildren!(node, child, ttree_children) |
72 | | - end |
73 | | -end |
74 | | - |
75 | | -function start_deep_timing() |
76 | | - Core.Compiler.Timings.reset_timings() |
77 | | - Core.Compiler.__set_measure_typeinf(true) |
78 | | -end |
79 | | -function stop_deep_timing() |
80 | | - Core.Compiler.__set_measure_typeinf(false) |
81 | | - Core.Compiler.Timings.close_current_timer() |
82 | | -end |
83 | | - |
84 | | -function finish_snoop_inference() |
85 | | - return InferenceTimingNode(Core.Compiler.Timings._timings[1]) |
86 | | -end |
87 | | - |
88 | | -function _snoop_inference(cmd::Expr) |
89 | | - return quote |
90 | | - start_deep_timing() |
91 | | - try |
92 | | - $(esc(cmd)) |
93 | | - finally |
94 | | - stop_deep_timing() |
95 | | - end |
96 | | - finish_snoop_inference() |
97 | | - end |
| 17 | +function stop_tracking() |
| 18 | + Base.assert_havelock(snoop_inference_lock) |
| 19 | + ccall(:jl_set_newly_inferred, Cvoid, (Any,), nothing) |
| 20 | + ccall(:jl_set_inference_entrance_backtraces, Cvoid, (Any,), nothing) |
| 21 | + unlock(snoop_inference_lock) |
| 22 | + return nothing |
98 | 23 | end |
99 | 24 |
|
100 | 25 | """ |
@@ -134,11 +59,134 @@ julia> tinf = @snoop_inference begin |
134 | 59 | ``` |
135 | 60 | """ |
136 | 61 | macro snoop_inference(cmd) |
137 | | - return _snoop_inference(cmd) |
| 62 | + return esc(quote |
| 63 | + local backtrace_log = $(SnoopCompileCore.start_tracking)() |
| 64 | + try |
| 65 | + $cmd |
| 66 | + finally |
| 67 | + $(SnoopCompileCore.stop_tracking)() |
| 68 | + end |
| 69 | + $timingtree($(SnoopCompileCore.newly_inferred), copy($(SnoopCompileCore.inference_entrance_backtraces))) |
| 70 | + end) |
| 71 | +end |
| 72 | + |
| 73 | +struct InferenceTimingNode |
| 74 | + ci::CodeInstance |
| 75 | + children::Vector{InferenceTimingNode} |
| 76 | + bt |
| 77 | + parent::InferenceTimingNode |
| 78 | + |
| 79 | + function InferenceTimingNode(ci::CodeInstance, st) # for creating the root |
| 80 | + return new(ci, InferenceTimingNode[], st) |
| 81 | + end |
| 82 | + function InferenceTimingNode(ci::CodeInstance, st, parent) |
| 83 | + child = new(ci, InferenceTimingNode[], st, parent) |
| 84 | + push!(parent.children, child) |
| 85 | + return child |
| 86 | + end |
| 87 | +end |
| 88 | + |
| 89 | +function timingtree(cis, _backtraces::Vector{Any}) |
| 90 | + root = InferenceTimingNode(Core.Compiler.Timings.ROOTmi.cache, nothing) |
| 91 | + # the cis are added in the order children-before-parents, we need to be able to reverse that |
| 92 | + # We index on MethodInstance rather than CodeInstance, because constprop can result in a distinct |
| 93 | + # (and uncached) CodeInstance for the same MethodInstance |
| 94 | + miidx = Dict([methodinstance(ci) for ci in cis] .=> eachindex(cis)) |
| 95 | + backedges = [Int[] for _ in eachindex(cis)] |
| 96 | + for (i, ci) in pairs(cis) |
| 97 | + for e in ci.edges |
| 98 | + e isa CodeInstance || continue |
| 99 | + eidx = get(miidx, methodinstance(e), nothing) |
| 100 | + if eidx !== nothing |
| 101 | + push!(backedges[eidx], i) |
| 102 | + end |
| 103 | + end |
| 104 | + end |
| 105 | + backtraces = Dict{CodeInstance,Vector{Union{Ptr{Nothing}, Base.InterpreterIP}}}() |
| 106 | + for i = 1:2:length(_backtraces) |
| 107 | + ci, trace = _backtraces[i], _backtraces[i+1] |
| 108 | + bt = Base._reformat_bt(trace[1], trace[2]) |
| 109 | + backtraces[ci] = bt |
| 110 | + end |
| 111 | + addchildren!(root, cis, backedges, miidx, backtraces) |
| 112 | + return root |
| 113 | +end |
| 114 | + |
| 115 | +function addchildren!(parent::InferenceTimingNode, handled::Set{CodeInstance}, miidx) |
| 116 | + for ci in parent.ci.edges |
| 117 | + ci isa CodeInstance || continue |
| 118 | + haskey(miidx, methodinstance(ci)) || continue |
| 119 | + ci ∈ handled && continue |
| 120 | + child = InferenceTimingNode(ci, nothing, parent) |
| 121 | + push!(handled, ci) |
| 122 | + addchildren!(child, handled, miidx) |
| 123 | + end |
| 124 | + return parent |
138 | 125 | end |
139 | 126 |
|
140 | | -# These are okay to come at the top-level because we're only measuring inference, and |
141 | | -# inference results will be cached in a `.ji` file. |
142 | | -precompile(start_deep_timing, ()) |
143 | | -precompile(stop_deep_timing, ()) |
144 | | -precompile(finish_snoop_inference, ()) |
| 127 | +function addchildren!(parent::InferenceTimingNode, cis, backedges, miidx, backtraces) |
| 128 | + handled = Set{CodeInstance}() |
| 129 | + for (i, ci) in pairs(cis) |
| 130 | + ci ∈ handled && continue |
| 131 | + # Follow the backedges to the root |
| 132 | + j = i |
| 133 | + be = ci |
| 134 | + while true |
| 135 | + found = false |
| 136 | + for k in backedges[j] |
| 137 | + be = cis[k] |
| 138 | + if be ∉ handled |
| 139 | + j = k |
| 140 | + found = true |
| 141 | + break |
| 142 | + end |
| 143 | + end |
| 144 | + found || break |
| 145 | + end |
| 146 | + be ∈ handled && continue |
| 147 | + # bt1, bt2 = get(backtraces, Core.Compiler.get_ci_mi(be), (nothing, nothing)) |
| 148 | + # child = InferenceTimingNode(be, make_stacktrace(bt1, bt2), parent) |
| 149 | + child = InferenceTimingNode(be, get(backtraces, be, nothing), parent) |
| 150 | + push!(handled, be) |
| 151 | + addchildren!(child, handled, miidx) |
| 152 | + end |
| 153 | + return parent |
| 154 | +end |
| 155 | + |
| 156 | +methodinstance(ci::CodeInstance) = Core.Compiler.get_ci_mi(ci) |
| 157 | + |
| 158 | +# make_stacktrace(bt1::Vector{Ptr{Cvoid}}, bt2::Vector{Any}) = Base._reformat_bt(bt1, bt2) |
| 159 | +# make_stacktrace(::Nothing, ::Nothing) = nothing |
| 160 | + |
| 161 | +## API functions |
| 162 | + |
| 163 | +""" |
| 164 | + inclusive(ci::InferenceTimingNode; include_llvm::Bool=true) |
| 165 | +
|
| 166 | +Return the time spent inferring `ci` and its callees. |
| 167 | +If `include_llvm` is true, the LLVM compilation time is added as well. |
| 168 | +""" |
| 169 | +inclusive(ci::CodeInstance; include_llvm::Bool=true) = Float64(reinterpret(Float16, ci.time_infer_total)) + |
| 170 | + include_llvm * Float64(reinterpret(Float16, ci.time_compile)) |
| 171 | +function inclusive(node::InferenceTimingNode; kwargs...) |
| 172 | + t = inclusive(node.ci; kwargs...) |
| 173 | + for child in node.children |
| 174 | + t += inclusive(child; kwargs...) |
| 175 | + end |
| 176 | + return t |
| 177 | +end |
| 178 | + |
| 179 | +""" |
| 180 | + exclusive(ci::InferenceTimingNode; include_llvm::Bool=true) |
| 181 | +
|
| 182 | +Return the time spent inferring `ci`, not including the time needed for any of its callees. |
| 183 | +If `include_llvm` is true, the LLVM compilation time is added. |
| 184 | +""" |
| 185 | +exclusive(ci::CodeInstance; include_llvm::Bool=true) = Float64(reinterpret(Float16, ci.time_infer_self)) + |
| 186 | + include_llvm * Float64(reinterpret(Float16, ci.time_compile)) |
| 187 | +exclusive(node::InferenceTimingNode; kwargs...) = exclusive(node.ci; kwargs...) |
| 188 | + |
| 189 | + |
| 190 | +precompile(start_tracking, ()) |
| 191 | +precompile(stop_tracking, ()) |
| 192 | +precompile(timingtree, (Vector{CodeInstance}, Vector{Any})) |
0 commit comments