optimizer: eliminate safe typeassert calls (JuliaLang#42706)

aviatesk · web-flow · commit 17e0bbaa6972 · 2021-10-21T14:52:25.000+09:00
Adds a very simple optimization pass to eliminate `typeassert` calls. The motivation is, when SROA replaces `getfield` calls with scalar values, then we can often prove `typeassert` whose first operand is a replaced value is no-op: ```julia julia> struct Foo; x; end julia> code_typed((Int,)) do a x1 = Foo(a) x2 = Foo(x1) typeassert(x2.x, Foo).x end |> only |> first CodeInfo( 1 ─ %1 = Main.Foo::Type{Foo} │ %2 = %new(%1, a)::Foo │ Main.typeassert(%2, Main.Foo)::Foo # can be nullified └── return a ) ``` Nullifying `typeassert` helps succeeding (simple) DCE to eliminate dead allocations, and also allows LLVM to do more aggressive DCE to emit simpler code. Here is a simple benchmarking: > sample target code: ```julia julia> function compute(T, n) r = 0 for i in 1:n x1 = T(i) x2 = T(x1) r += (x2.x::T).x::Int end r end compute (generic function with 1 method) julia> struct Foo; x; end julia> mutable struct Bar; x; end ``` > on master ```julia julia> @benchmark compute(Foo, 1000) BenchmarkTools.Trial: 10000 samples with 8 evaluations. Range (min … max): 3.263 μs … 145.828 μs ┊ GC (min … max): 0.00% … 97.14% Time (median): 3.516 μs ┊ GC (median): 0.00% Time (mean ± σ): 4.015 μs ± 3.726 μs ┊ GC (mean ± σ): 3.16% ± 3.46% ▇█▆▄▅▄▄▃▂▁▂▁ ▂ ▇███████████████▇██▇▇█▇▇▆▇▇▇▇▅▆▅▇▇▅██▇▇▆▇▇▇█▇█▇▇▅▆▆▆▆▅▅▅▅▄▄ █ 3.26 μs Histogram: log(frequency) by time 8.52 μs < Memory estimate: 7.64 KiB, allocs estimate: 489. julia> @benchmark compute(Bar, 1000) BenchmarkTools.Trial: 10000 samples with 4 evaluations. Range (min … max): 6.990 μs … 288.079 μs ┊ GC (min … max): 0.00% … 97.03% Time (median): 7.657 μs ┊ GC (median): 0.00% Time (mean ± σ): 9.019 μs ± 9.710 μs ┊ GC (mean ± σ): 4.59% ± 4.28% ▆█▆▄▃▂▂▁▂▃▂▁ ▁ ▁ ██████████████████████▇▇▇▇▇▆██████▇▇█▇▇▇▆▆▆▆▅▆▅▄▄▄▅▄▄▃▄▄▂▄▅ █ 6.99 μs Histogram: log(frequency) by time 20.7 μs < Memory estimate: 23.27 KiB, allocs estimate: 1489. ``` > on this branch ```julia julia> @benchmark compute(Foo, 1000) BenchmarkTools.Trial: 10000 samples with 1000 evaluations. Range (min … max): 1.234 ns … 116.188 ns ┊ GC (min … max): 0.00% … 0.00% Time (median): 1.246 ns ┊ GC (median): 0.00% Time (mean ± σ): 1.307 ns ± 1.444 ns ┊ GC (mean ± σ): 0.00% ± 0.00% █▇ ▂▂▁ ▂ ▁ ██████▇█▇▅▄▆▇▆▁▃▄▁▁▁▁▁▃▁▃▁▁▄▇▅▃▃▃▁▃▄▁▃▃▁▃▁▁▃▁▁▁▄▃▁▃▇███▇▇▇▆ █ 1.23 ns Histogram: log(frequency) by time 1.94 ns < Memory estimate: 0 bytes, allocs estimate: 0. julia> @benchmark compute(Bar, 1000) BenchmarkTools.Trial: 10000 samples with 1000 evaluations. Range (min … max): 1.234 ns … 33.790 ns ┊ GC (min … max): 0.00% … 0.00% Time (median): 1.245 ns ┊ GC (median): 0.00% Time (mean ± σ): 1.297 ns ± 0.677 ns ┊ GC (mean ± σ): 0.00% ± 0.00% █▇ ▃▂▁ ▁ ██████▆▆▅▁▄▅▅▄▁▄▄▄▃▄▃▁▃▁▃▄▃▁▃▁▃▁▁▁▃▃▁▃▃▁▁▁▁▁▁▁▃▁▄█████▇▇▇▇ █ 1.23 ns Histogram: log(frequency) by time 1.96 ns < Memory estimate: 0 bytes, allocs estimate: 0. ``` This `typeassert` elimination would be much more effective if we implement more aggressive SROA based on strong [alias analysis](https://github.com/aviatesk/EscapeAnalysis.jl) and/or [more aggressive Julia-level DCE](JuliaLang#27547). But this change is so simple that I don't think it hurts anything to have it for now.
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
@@ -317,31 +317,26 @@ function optimize(interp::AbstractInterpreter, opt::OptimizationState, params::O
 end
 
 function run_passes(ci::CodeInfo, sv::OptimizationState)
-    preserve_coverage = coverage_enabled(sv.mod)
-    ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, sv)
-    ir = slot2reg(ir, ci, sv)
-    #@Base.show ("after_construct", ir)
+    @timeit "convert"   ir = convert_to_ircode(ci, sv)
+    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
     # TODO: Domsorting can produce an updated domtree - no need to recompute here
     @timeit "compact 1" ir = compact!(ir)
-    @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
-    #@timeit "verify 2" verify_ir(ir)
-    ir = compact!(ir)
-    #@Base.show ("before_sroa", ir)
-    @timeit "SROA" ir = getfield_elim_pass!(ir)
-    #@Base.show ir.new_nodes
-    #@Base.show ("after_sroa", ir)
-    ir = adce_pass!(ir)
-    #@Base.show ("after_adce", ir)
+    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
+    # @timeit "verify 2" verify_ir(ir)
+    @timeit "compact 2" ir = compact!(ir)
+    @timeit "SROA"      ir = getfield_elim_pass!(ir)
+    @timeit "ADCE"      ir = adce_pass!(ir)
     @timeit "type lift" ir = type_lift_pass!(ir)
     @timeit "compact 3" ir = compact!(ir)
-    #@Base.show ir
     if JLOptions().debug_level == 2
         @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
     end
     return ir
 end
 
-function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv::OptimizationState)
+function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
+    code = copy_exprargs(ci.code)
+    coverage = coverage_enabled(sv.mod)
     # Go through and add an unreachable node after every
     # Union{} call. Then reindex labels.
     idx = 1
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
@@ -1467,3 +1467,8 @@ function iterate(x::BBIdxIter, (idx, bb)::Tuple{Int, Int}=(1, 1))
     end
     return (bb, idx), (idx + 1, next_bb)
 end
+
+is_known_call(e::Expr, @nospecialize(func), ir::IRCode) =
+    is_known_call(e, func, ir, ir.sptypes, ir.argtypes)
+
+argextype(@nospecialize(x), ir::IRCode) = argextype(x, ir, ir.sptypes, ir.argtypes)
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
@@ -449,10 +449,10 @@ function lift_comparison!(compact::IncrementalCompact, idx::Int,
     lifted_val = perform_lifting!(compact, visited_phinodes, cmp, lifting_cache, Bool, lifted_leaves, val)
     @assert lifted_val !== nothing
 
-    #global assertion_counter
-    #assertion_counter::Int += 1
-    #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), lifted_val), nothing, 0, true)
-    #return
+    # global assertion_counter
+    # assertion_counter::Int += 1
+    # insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), lifted_val), nothing, 0, true)
+    # return
     compact[idx] = lifted_val.x
 end
 
@@ -734,17 +734,6 @@ function getfield_elim_pass!(ir::IRCode)
             result_t = make_MaybeUndef(result_t)
         end
 
-#        @Base.show result_t
-#        @Base.show stmt
-#        for (k,v) in lifted_leaves
-#            @Base.show (k, v)
-#            if isa(k, AnySSAValue)
-#                @Base.show compact[k]
-#            end
-#            if isa(v, RefValue) && isa(v.x, AnySSAValue)
-#                @Base.show compact[v.x]
-#            end
-#        end
         val = perform_lifting!(compact, visited_phinodes, field, lifting_cache, result_t, lifted_leaves, stmt.args[2])
 
         # Insert the undef check if necessary
@@ -761,8 +750,8 @@ function getfield_elim_pass!(ir::IRCode)
 
         # global assertion_counter
         # assertion_counter::Int += 1
-        #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), val), nothing, 0, true)
-        #continue
+        # insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), val), nothing, 0, true)
+        # continue
         compact[idx] = val === nothing ? nothing : val.x
     end
 
@@ -894,7 +883,8 @@ function getfield_elim_pass!(ir::IRCode)
             ir[SSAValue(use)] = new_expr
         end
     end
-    ir
+
+    return ir
 end
 # assertion_counter = 0
 
@@ -935,7 +925,21 @@ end
 """
     adce_pass!(ir::IRCode) -> newir::IRCode
 
-Aggressive Dead Code Elimination pass to eliminate code.
+Aggressive Dead Code Elimination pass.
+
+In addition to a simple DCE for unused values and allocations,
+this pass also nullifies `typeassert` calls that can be proved to be no-op,
+in order to allow LLVM to emit simpler code down the road.
+
+Note that this pass is more effective after SROA optimization (i.e. `getfield_elim_pass!`),
+since SROA often allows this pass to:
+- eliminate allocation of object whose field references are all replaced with scalar values, and
+- nullify `typeassert` call whose first operand has been replaced with a scalar value
+  (, which may have introduced new type information that inference did not understand)
+
+Also note that currently this pass _needs_ to run after `getfield_elim_pass!`, because
+the `typeassert` elimination depends on the transformation within `getfield_elim_pass!`
+which redirects references of `typeassert`ed value to the corresponding `PiNode`.
 """
 function adce_pass!(ir::IRCode)
     phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes))
@@ -944,6 +948,14 @@ function adce_pass!(ir::IRCode)
     for ((_, idx), stmt) in compact
         if isa(stmt, PhiNode)
             push!(all_phis, idx)
+        elseif isexpr(stmt, :call)
+            # nullify safe `typeassert` calls
+            if is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
+                ty, isexact = instanceof_tfunc(compact_exprtype(compact, stmt.args[3]))
+                if isexact && compact_exprtype(compact, stmt.args[2]) ⊑ ty
+                    compact[idx] = nothing
+                end
+            end
         end
     end
     non_dce_finish!(compact)
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
@@ -3422,8 +3422,7 @@ let
     ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
     sv = Core.Compiler.OptimizationState(mi, Core.Compiler.OptimizationParams(),
         Core.Compiler.NativeInterpreter())
-    ir = Core.Compiler.convert_to_ircode(ci, Core.Compiler.copy_exprargs(ci.code),
-        false, sv)
+    ir = Core.Compiler.convert_to_ircode(ci, sv)
     ir = Core.Compiler.slot2reg(ir, ci, sv)
     ir = Core.Compiler.compact!(ir)
     Core.Compiler.replace_code_newstyle!(ci, ir, 4)
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
@@ -425,3 +425,32 @@ let # `getfield_elim_pass!` should work with constant globals
         return Meta.isexpr(stmt, :new)
     end
 end
+
+let # `typeassert_elim_pass!`
+    src = @eval Module() begin
+        struct Foo; x; end
+
+        code_typed((Int,)) do a
+            x1 = Foo(a)
+            x2 = Foo(x1)
+            x3 = Foo(x2)
+
+            r1 = (x2.x::Foo).x
+            r2 = (x2.x::Foo).x::Int
+            r3 = (x2.x::Foo).x::Integer
+            r4 = ((x3.x::Foo).x::Foo).x
+
+            return r1, r2, r3, r4
+        end |> only |> first
+    end
+    # eliminate `typeassert(f2.a, Foo)`
+    @test all(src.code) do @nospecialize(stmt)
+        Meta.isexpr(stmt, :call) || return true
+        ft = Core.Compiler.argextype(stmt.args[1], src, Any[], src.slottypes)
+        return Core.Compiler.widenconst(ft) !== typeof(typeassert)
+    end
+    # succeeding simple DCE will eliminate `Foo(a)`
+    @test all(src.code) do @nospecialize(stmt)
+        return !Meta.isexpr(stmt, :new)
+    end
+end