Skip to content

Commit 323b558

Browse files
committed
Update @snoop_inference for Julia 1.12 (#419)
This leverages the new timing fields of CodeInstance to replace the old Core.Compiler.Timings module. There is a bit of loss particularly in the domain of constant-propagation, which is illustrated in https://gist.github.com/timholy/9a64b27c1932bb414e69b8fe48284b5e. (Read testcase.jl before looking at the results.) As a consequence there are a small handful of failing tests locally, but the vast majority pass.
1 parent c74b54e commit 323b558

File tree

15 files changed

+377
-286
lines changed

15 files changed

+377
-286
lines changed

.github/workflows/Documenter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
- uses: actions/checkout@v5
2424
- uses: julia-actions/setup-julia@latest
2525
with:
26-
version: '1'
26+
version: 'min'
2727
- run: julia --project -e 'using Pkg; Pkg.develop([PackageSpec(path=joinpath(pwd(), "SnoopCompileCore"))])'
2828
- uses: julia-actions/julia-buildpkg@latest
2929
# To access the developer tools from within a package's environment, they should be in the default environment

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@ jobs:
4646
# - run: julia --project -e 'using Pkg; Pkg.develop([PackageSpec(path="SnoopCompileCore")])'
4747
- uses: julia-actions/julia-buildpkg@latest
4848
- uses: julia-actions/julia-runtest@latest
49+
with:
50+
coverage: false # FIXME: this is very sad, but coverage changes the snoop_inference.jl/Stale tests (as of Julia 1.13.0-DEV.1058)
4951
- run: julia --check-bounds=yes --project -e 'using Pkg; Pkg.test(; test_args=["cthulhu"], coverage=true)'
52+
- run: julia --check-bounds=yes --project -e 'using Pkg; Pkg.test(; test_args=["jet"], coverage=true)'
53+
continue-on-error: true # JET test is non-fatal
5054
- uses: julia-actions/julia-processcoverage@v1
5155
with:
5256
directories: src,SnoopCompileCore/src

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ author = ["Tim Holy <tim.holy@gmail.com>", "Shuhei Kadowaki <aviatesk@gmail.com>
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
8+
CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
89
FlameGraphs = "08572546-2f56-4bcf-ba4e-bab62c3a3f89"
910
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
1011
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
@@ -31,6 +32,7 @@ SCPyPlotExt = "PyPlot"
3132

3233
[compat]
3334
AbstractTrees = "0.4"
35+
CodeTracking = "1.3.9, 2"
3436
Cthulhu = "2"
3537
FlameGraphs = "1"
3638
InteractiveUtils = "1"

SnoopCompileCore/src/SnoopCompileCore.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
module SnoopCompileCore
22

3-
using Core: MethodInstance, CodeInfo
3+
using Core: MethodInstance, CodeInstance, CodeInfo
44

55
const ReinferUtils = isdefined(Base, :ReinferUtils) ? Base.ReinferUtils : Base.StaticData
66

Lines changed: 147 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,100 +1,25 @@
11
export @snoop_inference
22

3-
struct InferenceTiming
4-
mi_info::Core.Compiler.Timings.InferenceFrameInfo
5-
inclusive_time::Float64
6-
exclusive_time::Float64
3+
const snoop_inference_lock = ReentrantLock()
4+
const newly_inferred = CodeInstance[]
5+
const inference_entrance_backtraces = []
6+
7+
function start_tracking()
8+
iszero(snoop_inference_lock.reentrancy_cnt) || throw(ConcurrencyViolationError("already tracking inference (cannot nest `@snoop_inference` blocks)"))
9+
lock(snoop_inference_lock)
10+
empty!(newly_inferred)
11+
empty!(inference_entrance_backtraces)
12+
ccall(:jl_set_newly_inferred, Cvoid, (Any,), newly_inferred)
13+
ccall(:jl_set_inference_entrance_backtraces, Cvoid, (Any,), inference_entrance_backtraces)
14+
return nothing
715
end
8-
"""
9-
inclusive(frame)
10-
11-
Return the time spent inferring `frame` and its callees.
12-
"""
13-
inclusive(it::InferenceTiming) = it.inclusive_time
14-
"""
15-
exclusive(frame)
16-
17-
Return the time spent inferring `frame`, not including the time needed for any of its callees.
18-
"""
19-
exclusive(it::InferenceTiming) = it.exclusive_time
2016

21-
struct InferenceTimingNode
22-
mi_timing::InferenceTiming
23-
start_time::Float64
24-
children::Vector{InferenceTimingNode}
25-
bt
26-
parent::InferenceTimingNode
27-
28-
# Root constructor
29-
InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt)) =
30-
new(mi_timing, start_time, InferenceTimingNode[], bt)
31-
# Child constructor
32-
function InferenceTimingNode(mi_timing::InferenceTiming, start_time, @nospecialize(bt), parent::InferenceTimingNode)
33-
child = new(mi_timing, start_time, InferenceTimingNode[], bt, parent)
34-
push!(parent.children, child)
35-
return child
36-
end
37-
end
38-
inclusive(node::InferenceTimingNode) = inclusive(node.mi_timing)
39-
exclusive(node::InferenceTimingNode) = exclusive(node.mi_timing)
40-
InferenceTiming(node::InferenceTimingNode) = node.mi_timing
41-
42-
function InferenceTimingNode(t::Core.Compiler.Timings.Timing)
43-
ttree = timingtree(t)
44-
it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}}
45-
root = InferenceTimingNode(it, start_time, t.bt)
46-
addchildren!(root, t, ttree_children)
47-
return root
48-
end
49-
50-
# Compute inclusive times and store as a temporary tree.
51-
# To allow InferenceTimingNode to be both bidirectional and immutable, we need to create parent node before the child nodes.
52-
# However, each node stores its inclusive time, which can only be computed efficiently from the leaves up (children before parents).
53-
# This performs the inclusive-time computation, storing the result as a "temporary tree" that can be used during
54-
# InferenceTimingNode creation (see `addchildren!`).
55-
function timingtree(t::Core.Compiler.Timings.Timing)
56-
time, start_time = t.time/10^9, t.start_time/10^9
57-
incl_time = time
58-
tchildren = []
59-
for child in t.children
60-
tchild = timingtree(child)
61-
push!(tchildren, tchild)
62-
incl_time += inclusive(tchild[1])
63-
end
64-
return (InferenceTiming(t.mi_info, incl_time, time), start_time, tchildren)
65-
end
66-
67-
function addchildren!(parent::InferenceTimingNode, t::Core.Compiler.Timings.Timing, ttrees)
68-
for (child, ttree) in zip(t.children, ttrees)
69-
it, start_time, ttree_children = ttree::Tuple{InferenceTiming, Float64, Vector{Any}}
70-
node = InferenceTimingNode(it, start_time, child.bt, parent)
71-
addchildren!(node, child, ttree_children)
72-
end
73-
end
74-
75-
function start_deep_timing()
76-
Core.Compiler.Timings.reset_timings()
77-
Core.Compiler.__set_measure_typeinf(true)
78-
end
79-
function stop_deep_timing()
80-
Core.Compiler.__set_measure_typeinf(false)
81-
Core.Compiler.Timings.close_current_timer()
82-
end
83-
84-
function finish_snoop_inference()
85-
return InferenceTimingNode(Core.Compiler.Timings._timings[1])
86-
end
87-
88-
function _snoop_inference(cmd::Expr)
89-
return quote
90-
start_deep_timing()
91-
try
92-
$(esc(cmd))
93-
finally
94-
stop_deep_timing()
95-
end
96-
finish_snoop_inference()
97-
end
17+
function stop_tracking()
18+
Base.assert_havelock(snoop_inference_lock)
19+
ccall(:jl_set_newly_inferred, Cvoid, (Any,), nothing)
20+
ccall(:jl_set_inference_entrance_backtraces, Cvoid, (Any,), nothing)
21+
unlock(snoop_inference_lock)
22+
return nothing
9823
end
9924

10025
"""
@@ -134,11 +59,134 @@ julia> tinf = @snoop_inference begin
13459
```
13560
"""
13661
macro snoop_inference(cmd)
137-
return _snoop_inference(cmd)
62+
return esc(quote
63+
local backtrace_log = $(SnoopCompileCore.start_tracking)()
64+
try
65+
$cmd
66+
finally
67+
$(SnoopCompileCore.stop_tracking)()
68+
end
69+
$timingtree($(SnoopCompileCore.newly_inferred), copy($(SnoopCompileCore.inference_entrance_backtraces)))
70+
end)
71+
end
72+
73+
struct InferenceTimingNode
74+
ci::CodeInstance
75+
children::Vector{InferenceTimingNode}
76+
bt
77+
parent::InferenceTimingNode
78+
79+
function InferenceTimingNode(ci::CodeInstance, st) # for creating the root
80+
return new(ci, InferenceTimingNode[], st)
81+
end
82+
function InferenceTimingNode(ci::CodeInstance, st, parent)
83+
child = new(ci, InferenceTimingNode[], st, parent)
84+
push!(parent.children, child)
85+
return child
86+
end
87+
end
88+
89+
function timingtree(cis, _backtraces::Vector{Any})
90+
root = InferenceTimingNode(Core.Compiler.Timings.ROOTmi.cache, nothing)
91+
# the cis are added in the order children-before-parents, we need to be able to reverse that
92+
# We index on MethodInstance rather than CodeInstance, because constprop can result in a distinct
93+
# (and uncached) CodeInstance for the same MethodInstance
94+
miidx = Dict([methodinstance(ci) for ci in cis] .=> eachindex(cis))
95+
backedges = [Int[] for _ in eachindex(cis)]
96+
for (i, ci) in pairs(cis)
97+
for e in ci.edges
98+
e isa CodeInstance || continue
99+
eidx = get(miidx, methodinstance(e), nothing)
100+
if eidx !== nothing
101+
push!(backedges[eidx], i)
102+
end
103+
end
104+
end
105+
backtraces = Dict{CodeInstance,Vector{Union{Ptr{Nothing}, Base.InterpreterIP}}}()
106+
for i = 1:2:length(_backtraces)
107+
ci, trace = _backtraces[i], _backtraces[i+1]
108+
bt = Base._reformat_bt(trace[1], trace[2])
109+
backtraces[ci] = bt
110+
end
111+
addchildren!(root, cis, backedges, miidx, backtraces)
112+
return root
113+
end
114+
115+
function addchildren!(parent::InferenceTimingNode, handled::Set{CodeInstance}, miidx)
116+
for ci in parent.ci.edges
117+
ci isa CodeInstance || continue
118+
haskey(miidx, methodinstance(ci)) || continue
119+
ci handled && continue
120+
child = InferenceTimingNode(ci, nothing, parent)
121+
push!(handled, ci)
122+
addchildren!(child, handled, miidx)
123+
end
124+
return parent
138125
end
139126

140-
# These are okay to come at the top-level because we're only measuring inference, and
141-
# inference results will be cached in a `.ji` file.
142-
precompile(start_deep_timing, ())
143-
precompile(stop_deep_timing, ())
144-
precompile(finish_snoop_inference, ())
127+
function addchildren!(parent::InferenceTimingNode, cis, backedges, miidx, backtraces)
128+
handled = Set{CodeInstance}()
129+
for (i, ci) in pairs(cis)
130+
ci handled && continue
131+
# Follow the backedges to the root
132+
j = i
133+
be = ci
134+
while true
135+
found = false
136+
for k in backedges[j]
137+
be = cis[k]
138+
if be handled
139+
j = k
140+
found = true
141+
break
142+
end
143+
end
144+
found || break
145+
end
146+
be handled && continue
147+
# bt1, bt2 = get(backtraces, Core.Compiler.get_ci_mi(be), (nothing, nothing))
148+
# child = InferenceTimingNode(be, make_stacktrace(bt1, bt2), parent)
149+
child = InferenceTimingNode(be, get(backtraces, be, nothing), parent)
150+
push!(handled, be)
151+
addchildren!(child, handled, miidx)
152+
end
153+
return parent
154+
end
155+
156+
methodinstance(ci::CodeInstance) = Core.Compiler.get_ci_mi(ci)
157+
158+
# make_stacktrace(bt1::Vector{Ptr{Cvoid}}, bt2::Vector{Any}) = Base._reformat_bt(bt1, bt2)
159+
# make_stacktrace(::Nothing, ::Nothing) = nothing
160+
161+
## API functions
162+
163+
"""
164+
inclusive(ci::InferenceTimingNode; include_llvm::Bool=true)
165+
166+
Return the time spent inferring `ci` and its callees.
167+
If `include_llvm` is true, the LLVM compilation time is added as well.
168+
"""
169+
inclusive(ci::CodeInstance; include_llvm::Bool=true) = Float64(reinterpret(Float16, ci.time_infer_total)) +
170+
include_llvm * Float64(reinterpret(Float16, ci.time_compile))
171+
function inclusive(node::InferenceTimingNode; kwargs...)
172+
t = inclusive(node.ci; kwargs...)
173+
for child in node.children
174+
t += inclusive(child; kwargs...)
175+
end
176+
return t
177+
end
178+
179+
"""
180+
exclusive(ci::InferenceTimingNode; include_llvm::Bool=true)
181+
182+
Return the time spent inferring `ci`, not including the time needed for any of its callees.
183+
If `include_llvm` is true, the LLVM compilation time is added.
184+
"""
185+
exclusive(ci::CodeInstance; include_llvm::Bool=true) = Float64(reinterpret(Float16, ci.time_infer_self)) +
186+
include_llvm * Float64(reinterpret(Float16, ci.time_compile))
187+
exclusive(node::InferenceTimingNode; kwargs...) = exclusive(node.ci; kwargs...)
188+
189+
190+
precompile(start_tracking, ())
191+
precompile(stop_tracking, ())
192+
precompile(timingtree, (Vector{CodeInstance}, Vector{Any}))

docs/src/tutorials/invalidations.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ tree = trees[1]
156156

157157
Each tree stems from a single *cause* described in the top line. For this tree, the cause was adding the new method `score(::Char)` in `BlackjackFacecards`.
158158

159+
!!! note
160+
A tree with no cause indicates that the cause occurred before you turned on snooping.
161+
159162
Each *cause* is associated with one or more *victims* of invalidation, a list here named `mt_backedges`. Let's extract the final (and in this case, only) victim:
160163

161164
```@repl tutorial-invalidations

src/SnoopCompile.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ using InteractiveUtils
4141
using Serialization
4242
using Printf
4343
using OrderedCollections
44+
using CodeTracking
4445
import YAML # For @snoop_llvm
4546

4647
using Base: specializations
@@ -73,6 +74,7 @@ end
7374

7475
include("parcel_snoop_inference.jl")
7576
include("inference_demos.jl")
77+
export InferenceTiming, InferenceTimingNode
7678
export exclusive, inclusive, flamegraph, flatten, accumulate_by_source, collect_for, runtime_inferencetime, staleinstances
7779
export InferenceTrigger, inference_triggers, callerinstance, callingframe, skiphigherorder, trigger_tree, suggest, isignorable
7880
export report_callee, report_caller, report_callees
@@ -83,8 +85,8 @@ export read_snoop_llvm
8385
include("invalidations.jl")
8486
export uinvalidated, invalidation_trees, filtermod, findcaller
8587

86-
# include("invalidation_and_inference.jl")
87-
# export precompile_blockers
88+
include("invalidation_and_inference.jl")
89+
export precompile_blockers
8890

8991
# Write
9092
include("write.jl")

0 commit comments

Comments
 (0)