Skip to content

Commit a7b8c83

Browse files
authored
Revert #57979 (and following #58083 #58082) (#58182)
The point of #57979 was to make inference faster, but it made it instead much slower (83524ac#commitcomment-155658124), so revert back to the fast behavior before it was "optimized" (and revert the bugfixes for the original commit).
1 parent 4766133 commit a7b8c83

File tree

6 files changed

+90
-154
lines changed

6 files changed

+90
-154
lines changed

Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ end
4747

4848
function Compiler.transform_result_for_cache(interp::SplitCacheInterp, result::Compiler.InferenceResult, edges::Compiler.SimpleVector)
4949
opt = result.src::Compiler.OptimizationState
50-
ir = opt.optresult.ir::Compiler.IRCode
50+
ir = opt.result.ir::Compiler.IRCode
5151
override = with_new_compiler
5252
for inst in ir.stmts
5353
stmt = inst[:stmt]

Compiler/src/optimize.jl

Lines changed: 64 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -116,14 +116,11 @@ function inline_cost_clamp(x::Int)
116116
return convert(InlineCostType, x)
117117
end
118118

119-
const SRC_FLAG_DECLARED_INLINE = 0x1
120-
const SRC_FLAG_DECLARED_NOINLINE = 0x2
121-
122119
is_declared_inline(@nospecialize src::MaybeCompressed) =
123-
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE
120+
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
124121

125122
is_declared_noinline(@nospecialize src::MaybeCompressed) =
126-
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE
123+
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
127124

128125
#####################
129126
# OptimizationState #
@@ -160,7 +157,6 @@ code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.wor
160157

161158
mutable struct OptimizationResult
162159
ir::IRCode
163-
inline_flag::UInt8
164160
simplified::Bool # indicates whether the IR was processed with `cfg_simplify!`
165161
end
166162

@@ -172,7 +168,7 @@ end
172168
mutable struct OptimizationState{Interp<:AbstractInterpreter}
173169
linfo::MethodInstance
174170
src::CodeInfo
175-
optresult::Union{Nothing, OptimizationResult}
171+
result::Union{Nothing, OptimizationResult}
176172
stmt_info::Vector{CallInfo}
177173
mod::Module
178174
sptypes::Vector{VarState}
@@ -240,29 +236,13 @@ include("ssair/EscapeAnalysis.jl")
240236
include("ssair/passes.jl")
241237
include("ssair/irinterp.jl")
242238

243-
function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector)
244-
ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult))
245-
end
246-
247-
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType)
248-
src = ir_to_codeinf!(opt, edges)
249-
src.inlining_cost = inlining_cost
250-
src
251-
end
252-
253-
function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector)
254-
src = ir_to_codeinf!(opt)
255-
src.edges = edges
256-
src
257-
end
258-
259239
function ir_to_codeinf!(opt::OptimizationState)
260-
(; linfo, src, optresult) = opt
261-
if optresult === nothing
240+
(; linfo, src, result) = opt
241+
if result === nothing
262242
return src
263243
end
264-
src = ir_to_codeinf!(src, optresult.ir)
265-
opt.optresult = nothing
244+
src = ir_to_codeinf!(src, result.ir)
245+
opt.result = nothing
266246
opt.src = src
267247
maybe_validate_code(linfo, src, "optimized")
268248
return src
@@ -505,12 +485,63 @@ end
505485
abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
506486

507487
"""
508-
finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
488+
finish(interp::AbstractInterpreter, opt::OptimizationState,
489+
ir::IRCode, caller::InferenceResult)
509490
510-
Called at the end of optimization to store the resulting IR back into the OptimizationState.
491+
Post-process information derived by Julia-level optimizations for later use.
492+
In particular, this function determines the inlineability of the optimized code.
511493
"""
512-
function finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
513-
opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false)
494+
function finish(interp::AbstractInterpreter, opt::OptimizationState,
495+
ir::IRCode, caller::InferenceResult)
496+
(; src, linfo) = opt
497+
(; def, specTypes) = linfo
498+
499+
force_noinline = is_declared_noinline(src)
500+
501+
# compute inlining and other related optimizations
502+
result = caller.result
503+
@assert !(result isa LimitedAccuracy)
504+
result = widenslotwrapper(result)
505+
506+
opt.result = OptimizationResult(ir, false)
507+
508+
# determine and cache inlineability
509+
if !force_noinline
510+
sig = unwrap_unionall(specTypes)
511+
if !(isa(sig, DataType) && sig.name === Tuple.name)
512+
force_noinline = true
513+
end
514+
if !is_declared_inline(src) && result === Bottom
515+
force_noinline = true
516+
end
517+
end
518+
if force_noinline
519+
set_inlineable!(src, false)
520+
elseif isa(def, Method)
521+
if is_declared_inline(src) && isdispatchtuple(specTypes)
522+
# obey @inline declaration if a dispatch barrier would not help
523+
set_inlineable!(src, true)
524+
else
525+
# compute the cost (size) of inlining this code
526+
params = OptimizationParams(interp)
527+
cost_threshold = default = params.inline_cost_threshold
528+
if (optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
529+
cost_threshold += params.inline_tupleret_bonus
530+
end
531+
# if the method is declared as `@inline`, increase the cost threshold 20x
532+
if is_declared_inline(src)
533+
cost_threshold += 19*default
534+
end
535+
# a few functions get special treatment
536+
if def.module === _topmod(def.module)
537+
name = def.name
538+
if name === :iterate || name === :unsafe_convert || name === :cconvert
539+
cost_threshold += 4*default
540+
end
541+
end
542+
src.inlining_cost = inline_cost(ir, params, cost_threshold)
543+
end
544+
end
514545
return nothing
515546
end
516547

@@ -984,8 +1015,7 @@ end
9841015
function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
9851016
@zone "CC: OPTIMIZER" ir = run_passes_ipo_safe(opt.src, opt)
9861017
ipo_dataflow_analysis!(interp, opt, ir, caller)
987-
finishopt!(interp, opt, ir)
988-
return nothing
1018+
return finish(interp, opt, ir, caller)
9891019
end
9901020

9911021
const ALL_PASS_NAMES = String[]
@@ -1436,7 +1466,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
14361466
return thiscost
14371467
end
14381468

1439-
function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
1469+
function inline_cost(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
14401470
bodycost = 0
14411471
for i = 1:length(ir.stmts)
14421472
stmt = ir[SSAValue(i)][:stmt]

Compiler/src/ssair/inlining.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -976,7 +976,7 @@ function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local
976976
return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts))
977977
end
978978
function retrieve_ir_for_inlining(mi::MethodInstance, opt::OptimizationState, preserve_local_sources::Bool)
979-
result = opt.optresult
979+
result = opt.result
980980
if result !== nothing
981981
!result.simplified && simplify_ir!(result)
982982
return retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)

Compiler/src/typeinfer.jl

Lines changed: 24 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,7 @@ end
104104
function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt, time_before::UInt64)
105105
result = caller.result
106106
#@assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
107-
if caller.cache_mode === CACHE_MODE_LOCAL
108-
@assert !isdefined(result, :ci)
109-
result.src = transform_result_for_local_cache(interp, result)
110-
elseif isdefined(result, :ci)
107+
if isdefined(result, :ci)
111108
edges = result_edges(interp, caller)
112109
ci = result.ci
113110
# if we aren't cached, we don't need this edge
@@ -118,47 +115,40 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
118115
store_backedges(ci, edges)
119116
end
120117
inferred_result = nothing
121-
uncompressed = result.src
118+
uncompressed = inferred_result
122119
const_flag = is_result_constabi_eligible(result)
123-
debuginfo = get_debuginfo(result.src)
124120
discard_src = caller.cache_mode === CACHE_MODE_NULL || const_flag
125121
if !discard_src
126122
inferred_result = transform_result_for_cache(interp, result, edges)
127-
if inferred_result !== nothing
128-
uncompressed = inferred_result
129-
debuginfo = get_debuginfo(inferred_result)
130-
# Inlining may fast-path the global cache via `VolatileInferenceResult`, so store it back here
131-
result.src = inferred_result
132-
end
133123
# TODO: do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)?
134124
if inferred_result isa CodeInfo
125+
result.src = inferred_result
135126
if may_compress(interp)
136127
nslots = length(inferred_result.slotflags)
137128
resize!(inferred_result.slottypes::Vector{Any}, nslots)
138129
resize!(inferred_result.slotnames, nslots)
139130
end
131+
di = inferred_result.debuginfo
132+
uncompressed = inferred_result
140133
inferred_result = maybe_compress_codeinfo(interp, result.linfo, inferred_result)
141134
result.is_src_volatile = false
142135
elseif ci.owner === nothing
143136
# The global cache can only handle objects that codegen understands
144137
inferred_result = nothing
145138
end
146139
end
147-
if debuginfo === nothing
148-
debuginfo = DebugInfo(result.linfo)
140+
if !@isdefined di
141+
di = DebugInfo(result.linfo)
149142
end
150143
time_now = _time_ns()
151144
time_self_ns = caller.time_self_ns + (time_now - time_before)
152145
time_total = (time_now - caller.time_start - caller.time_paused) * 1e-9
153146
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
154147
ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects),
155-
result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, debuginfo, edges)
148+
result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges)
156149
engine_reject(interp, ci)
157150
codegen = codegen_cache(interp)
158-
if !discard_src && codegen !== nothing && (isa(uncompressed, CodeInfo) || isa(uncompressed, OptimizationState))
159-
if isa(uncompressed, OptimizationState)
160-
uncompressed = ir_to_codeinf!(uncompressed, edges)
161-
end
151+
if !discard_src && codegen !== nothing && uncompressed isa CodeInfo
162152
# record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
163153
codegen[ci] = uncompressed
164154
if bootstrapping_compiler && inferred_result == nothing
@@ -309,116 +299,36 @@ function adjust_cycle_frame!(sv::InferenceState, cycle_valid_worlds::WorldRange,
309299
return nothing
310300
end
311301

312-
function get_debuginfo(src)
313-
isa(src, CodeInfo) && return src.debuginfo
314-
isa(src, OptimizationState) && return src.src.debuginfo
315-
return nothing
316-
end
317-
318302
function is_result_constabi_eligible(result::InferenceResult)
319303
result_type = result.result
320304
return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
321305
end
322306

323-
function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult)
324-
src = result.src
325-
isa(src, OptimizationState) || return MAX_INLINE_COST
326-
compute_inlining_cost(interp, result, src.optresult)
327-
end
328-
329-
function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult, optresult#=::OptimizationResult=#)
330-
return inline_cost_model(interp, result, optresult.inline_flag, optresult.ir)
331-
end
332-
333-
function inline_cost_model(interp::AbstractInterpreter, result::InferenceResult,
334-
inline_flag::UInt8, ir::IRCode)
335-
336-
inline_flag === SRC_FLAG_DECLARED_NOINLINE && return MAX_INLINE_COST
337-
338-
mi = result.linfo
339-
(; def, specTypes) = mi
340-
if !isa(def, Method)
341-
return MAX_INLINE_COST
342-
end
343-
344-
declared_inline = inline_flag === SRC_FLAG_DECLARED_INLINE
345-
346-
rt = result.result
347-
@assert !(rt isa LimitedAccuracy)
348-
rt = widenslotwrapper(rt)
349-
350-
sig = unwrap_unionall(specTypes)
351-
if !(isa(sig, DataType) && sig.name === Tuple.name)
352-
return MAX_INLINE_COST
353-
end
354-
if !declared_inline && rt === Bottom
355-
return MAX_INLINE_COST
356-
end
357-
358-
if declared_inline && isdispatchtuple(specTypes)
359-
# obey @inline declaration if a dispatch barrier would not help
360-
return MIN_INLINE_COST
361-
else
362-
# compute the cost (size) of inlining this code
363-
params = OptimizationParams(interp)
364-
cost_threshold = default = params.inline_cost_threshold
365-
if (optimizer_lattice(interp), rt, Tuple) && !isconcretetype(widenconst(rt))
366-
cost_threshold += params.inline_tupleret_bonus
367-
end
368-
# if the method is declared as `@inline`, increase the cost threshold 20x
369-
if declared_inline
370-
cost_threshold += 19*default
371-
end
372-
# a few functions get special treatment
373-
if def.module === _topmod(def.module)
374-
name = def.name
375-
if name === :iterate || name === :unsafe_convert || name === :cconvert
376-
cost_threshold += 4*default
377-
end
378-
end
379-
return inline_cost_model(ir, params, cost_threshold)
380-
end
381-
end
382-
383-
function transform_result_for_local_cache(interp::AbstractInterpreter, result::InferenceResult)
384-
if is_result_constabi_eligible(result)
385-
return nothing
386-
end
307+
function transform_result_for_cache(::AbstractInterpreter, result::InferenceResult, edges::SimpleVector)
387308
src = result.src
388309
if isa(src, OptimizationState)
389-
# Compute and store any information required to determine the inlineability of the callee.
390-
opt = src
391-
opt.src.inlining_cost = compute_inlining_cost(interp, result)
392-
end
393-
return src
394-
end
395-
396-
function transform_result_for_cache(interp::AbstractInterpreter, result::InferenceResult, edges::SimpleVector)
397-
inlining_cost = nothing
398-
src = result.src
399-
if isa(src, OptimizationState)
400-
opt = src
401-
inlining_cost = compute_inlining_cost(interp, result, opt.optresult)
402-
discard_optimized_result(interp, opt, inlining_cost) && return nothing
403-
src = ir_to_codeinf!(opt)
310+
src = ir_to_codeinf!(src)
404311
end
405312
if isa(src, CodeInfo)
406313
src.edges = edges
407-
src.inlining_cost = inlining_cost !== nothing ? inlining_cost : compute_inlining_cost(interp, result)
408314
end
409315
return src
410316
end
411317

412-
function discard_optimized_result(interp::AbstractInterpreter, opt#=::OptimizationState=#, inlining_cost#=::InlineCostType=#)
413-
may_discard_trees(interp) || return false
414-
return inlining_cost == MAX_INLINE_COST
415-
end
416-
417318
function maybe_compress_codeinfo(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInfo)
418319
def = mi.def
419320
isa(def, Method) || return ci # don't compress toplevel code
420-
may_compress(interp) && return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
421-
return ci
321+
can_discard_trees = may_discard_trees(interp)
322+
cache_the_tree = !can_discard_trees || is_inlineable(ci)
323+
if cache_the_tree
324+
if may_compress(interp)
325+
return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
326+
else
327+
return ci
328+
end
329+
else
330+
return nothing
331+
end
422332
end
423333

424334
function cache_result!(interp::AbstractInterpreter, result::InferenceResult, ci::CodeInstance)
@@ -1193,7 +1103,8 @@ function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_opti
11931103
else
11941104
opt = OptimizationState(frame, interp)
11951105
optimize(interp, opt, frame.result)
1196-
src = ir_to_codeinf!(opt, frame, Core.svec(opt.inlining.edges...))
1106+
src = ir_to_codeinf!(opt)
1107+
src.edges = Core.svec(opt.inlining.edges...)
11971108
end
11981109
result.src = frame.src = src
11991110
end

Compiler/test/codegen.jl

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
using Random
66
using InteractiveUtils
7-
using InteractiveUtils: code_llvm, code_native
87
using Libdl
98
using Test
109

0 commit comments

Comments
 (0)