@@ -12,7 +12,7 @@ module Timings
1212
1313using .. Core
1414using .. Compiler: - , + , :, Vector, length, first, empty!, push!, pop!, @inline ,
15- @inbounds , copy, backtrace
15+ @inbounds , copy, backtrace, _time_ns
1616
1717# What we record for any given frame we infer during type inference.
1818struct InferenceFrameInfo
5353Timing (mi_info, start_time, cur_start_time, time, children) = Timing (mi_info, start_time, cur_start_time, time, children, nothing )
5454Timing (mi_info, start_time) = Timing (mi_info, start_time, start_time, UInt64 (0 ), Timing[])
5555
56- _time_ns () = ccall (:jl_hrtime , UInt64, ())
57-
5856# We keep a stack of the Timings for each of the MethodInstances currently being timed.
5957# Since type inference currently operates via a depth-first search (during abstract
6058# evaluation), this vector operates like a call stack. The last node in _timings is the
@@ -103,7 +101,7 @@ function result_edges(interp::AbstractInterpreter, caller::InferenceState)
103101 end
104102end
105103
106- function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt )
104+ function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt , time_before :: UInt64 )
107105 result = caller. result
108106 # @assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
109107 if isdefined (result, :ci )
@@ -142,9 +140,12 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
142140 if ! @isdefined di
143141 di = DebugInfo (result. linfo)
144142 end
145- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
143+ time_now = _time_ns ()
144+ time_self_ns = caller. time_self_ns + (time_now - time_before)
145+ time_total = (time_now - caller. time_start - caller. time_paused) * 1e-9
146+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
146147 ci, inferred_result, const_flag, first (result. valid_worlds), last (result. valid_worlds), encode_effects (result. ipo_effects),
147- result. analysis_results, di, edges)
148+ result. analysis_results, time_total, caller . time_caches, time_self_ns * 1e-9 , di, edges)
148149 engine_reject (interp, ci)
149150 codegen = codegen_cache (interp)
150151 if ! discard_src && codegen != = nothing && uncompressed isa CodeInfo
@@ -186,8 +187,8 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
186187 end
187188 ccall (:jl_fill_codeinst , Cvoid, (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
188189 ci, rettype, exctype, nothing , const_flags, min_world, max_world, ipo_effects, nothing , di, edges)
189- ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Any, Any),
190- ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , di, edges)
190+ ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
191+ ci, nothing , const_flag, min_world, max_world, ipo_effects, nothing , 0.0 , 0.0 , 0.0 , di, edges)
191192 code_cache (interp)[mi] = ci
192193 codegen = codegen_cache (interp)
193194 if codegen != = nothing
@@ -197,14 +198,14 @@ function finish!(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInstan
197198 return nothing
198199end
199200
200- function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState )
201+ function finish_nocycle (:: AbstractInterpreter , frame:: InferenceState , time_before :: UInt64 )
201202 finishinfer! (frame, frame. interp, frame. cycleid)
202203 opt = frame. result. src
203204 if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
204205 optimize (frame. interp, opt, frame. result)
205206 end
206207 validation_world = get_world_counter ()
207- finish! (frame. interp, frame, validation_world)
208+ finish! (frame. interp, frame, validation_world, time_before )
208209 if isdefined (frame. result, :ci )
209210 # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
210211 # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
@@ -219,7 +220,7 @@ function finish_nocycle(::AbstractInterpreter, frame::InferenceState)
219220 return nothing
220221end
221222
222- function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int )
223+ function finish_cycle (:: AbstractInterpreter , frames:: Vector{AbsIntState} , cycleid:: Int , time_before :: UInt64 )
223224 cycle_valid_worlds = WorldRange ()
224225 cycle_valid_effects = EFFECTS_TOTAL
225226 for frameid = cycleid: length (frames)
@@ -236,23 +237,45 @@ function finish_cycle(::AbstractInterpreter, frames::Vector{AbsIntState}, cyclei
236237 caller = frames[frameid]:: InferenceState
237238 adjust_cycle_frame! (caller, cycle_valid_worlds, cycle_valid_effects)
238239 finishinfer! (caller, caller. interp, cycleid)
240+ time_now = _time_ns ()
241+ caller. time_self_ns += (time_now - time_before)
242+ time_before = time_now
239243 end
244+ time_caches = 0.0 # the total and adjusted time of every entry in the cycle are the same
245+ time_paused = UInt64 (0 )
240246 for frameid = cycleid: length (frames)
241247 caller = frames[frameid]:: InferenceState
242248 opt = caller. result. src
243249 if opt isa OptimizationState # implies `may_optimize(caller.interp) === true`
244250 optimize (caller. interp, opt, caller. result)
251+ time_now = _time_ns ()
252+ caller. time_self_ns += (time_now - time_before)
253+ time_before = time_now
245254 end
255+ time_caches += caller. time_caches
256+ time_paused += caller. time_paused
257+ caller. time_paused = UInt64 (0 )
258+ caller. time_caches = 0.0
246259 end
260+ cycletop = frames[cycleid]:: InferenceState
261+ time_start = cycletop. time_start
247262 validation_world = get_world_counter ()
248263 cis = CodeInstance[]
249264 for frameid = cycleid: length (frames)
250265 caller = frames[frameid]:: InferenceState
251- finish! (caller. interp, caller, validation_world)
266+ caller. time_start = time_start
267+ caller. time_caches = time_caches
268+ caller. time_paused = time_paused
269+ finish! (caller. interp, caller, validation_world, time_before)
252270 if isdefined (caller. result, :ci )
253271 push! (cis, caller. result. ci)
254272 end
255273 end
274+ if cycletop. parentid != 0
275+ parent = frames[cycletop. parentid]
276+ parent. time_caches += time_caches
277+ parent. time_paused += time_paused
278+ end
256279 # After validation, under the world_counter_lock, set max_world to typemax(UInt) for all dependencies
257280 # (recursively). From that point onward the ordinary backedge mechanism is responsible for maintaining
258281 # validity.
@@ -792,9 +815,10 @@ function return_cached_result(interp::AbstractInterpreter, method::Method, codei
792815 rt = cached_return_type (codeinst)
793816 exct = codeinst. exctype
794817 effects = ipo_effects (codeinst)
795- edge = codeinst
796818 update_valid_age! (caller, WorldRange (min_world (codeinst), max_world (codeinst)))
797- return Future (MethodCallResult (interp, caller, method, rt, exct, effects, edge, edgecycle, edgelimited))
819+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_total)
820+ caller. time_caches += reinterpret (Float16, codeinst. time_infer_cache_saved)
821+ return Future (MethodCallResult (interp, caller, method, rt, exct, effects, codeinst, edgecycle, edgelimited))
798822end
799823
800824function MethodCallResult (:: AbstractInterpreter , sv:: AbsIntState , method:: Method ,
@@ -890,7 +914,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
890914 if frame === false
891915 # completely new, but check again after reserving in the engine
892916 if cache_mode == CACHE_MODE_GLOBAL
917+ reserve_start = _time_ns () # subtract engine_reserve (thread-synchronization) time from callers to avoid double-counting
893918 ci_from_engine = engine_reserve (interp, mi)
919+ caller. time_paused += (_time_ns () - reserve_start)
894920 edge_ci = ci_from_engine
895921 codeinst = get (code_cache (interp), mi, nothing )
896922 if codeinst isa CodeInstance # return existing rettype if the code is already inferred
0 commit comments