Skip to content

Commit 19ad3be

Browse files
committed
optimizations: better modeling and codegen for apply calls
- Use svec instead of tuple for arguments (better match for ABI which will require boxes) - Directly forward single svec argument, both runtime and codegen, without copying. - Optimize all consistant builtin functions of constant arguments, not just ones with special tfuncs. Reducing code duplication and divergence. - Codegen for `svec()` directly, so optimizer can see each store (and doesn't have to build the whole thing on the stack first).
1 parent aecb173 commit 19ad3be

File tree

7 files changed

+155
-33
lines changed

7 files changed

+155
-33
lines changed

Compiler/src/ssair/passes.jl

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,49 @@ function perform_lifting!(compact::IncrementalCompact,
874874
return Pair{Any, PhiNest}(stmt_val, PhiNest(visited_philikes, lifted_philikes, lifted_leaves, reverse_mapping, walker_callback))
875875
end
876876

877+
function lift_apply_args!(compact::IncrementalCompact, idx::Int, stmt::Expr, 𝕃ₒ::AbstractLattice)
878+
# Handle _apply_iterate calls: convert arguments to use `Core.svec`. The behavior of Core.svec (with boxing) better matches the ABI of codegen.
879+
compact[idx] = nothing
880+
for i in 4:length(stmt.args) # Skip iterate function, f, and first iterator
881+
arg = stmt.args[i]
882+
arg_type = argextype(arg, compact)
883+
svec_args = nothing
884+
if isa(arg_type, DataType) && arg_type.name === Tuple.name
885+
if isa(arg, SSAValue)
886+
arg_stmt = compact[arg][:stmt]
887+
if is_known_call(arg_stmt, Core.tuple, compact)
888+
svec_args = copy(arg_stmt.args)
889+
end
890+
end
891+
if svec_args === nothing
892+
# Fallback path: generate getfield calls for tuple elements
893+
tuple_length = length(arg_type.parameters)
894+
if tuple_length > 0 && !isvarargtype(arg_type.parameters[tuple_length])
895+
svec_args = Vector{Any}(undef, tuple_length + 1)
896+
for j in 1:tuple_length
897+
getfield_call = Expr(:call, GlobalRef(Core, :getfield), arg, j)
898+
getfield_type = arg_type.parameters[j]
899+
inst = compact[SSAValue(idx)]
900+
getfield_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(getfield_call, getfield_type, NoCallInfo(), inst[:line], inst[:flag]))
901+
svec_args[j + 1] = getfield_ssa
902+
end
903+
end
904+
end
905+
end
906+
# Create Core.svec call if we have arguments
907+
if svec_args !== nothing
908+
svec_args[1] = GlobalRef(Core, :svec)
909+
new_svec_call = Expr(:call)
910+
new_svec_call.args = svec_args
911+
inst = compact[SSAValue(idx)]
912+
new_svec_ssa = insert_node!(compact, SSAValue(idx), NewInstruction(new_svec_call, SimpleVector, NoCallInfo(), inst[:line], inst[:flag]))
913+
stmt.args[i] = new_svec_ssa
914+
end
915+
end
916+
compact[idx] = stmt
917+
nothing
918+
end
919+
877920
function lift_svec_ref!(compact::IncrementalCompact, idx::Int, stmt::Expr)
878921
length(stmt.args) != 3 && return
879922

@@ -1377,6 +1420,9 @@ function sroa_pass!(ir::IRCode, inlining::Union{Nothing,InliningState}=nothing)
13771420
compact[SSAValue(idx)] = (compact[enter_ssa][:stmt]::EnterNode).scope
13781421
elseif isexpr(stmt, :new)
13791422
refine_new_effects!(𝕃ₒ, compact, idx, stmt)
1423+
elseif is_known_call(stmt, Core._apply_iterate, compact)
1424+
length(stmt.args) >= 4 || continue
1425+
lift_apply_args!(compact, idx, stmt, 𝕃ₒ)
13801426
end
13811427
continue
13821428
end

Compiler/src/tfuncs.jl

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,15 @@ end
580580
add_tfunc(nfields, 1, 1, nfields_tfunc, 1)
581581
add_tfunc(Core._expr, 1, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->Expr), 100)
582582
add_tfunc(svec, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->SimpleVector), 20)
583+
584+
@nospecs function _svec_len_tfunc(𝕃::AbstractLattice, s)
585+
if isa(s, Const) && isa(s.val, SimpleVector)
586+
return Const(length(s.val))
587+
end
588+
return Int
589+
end
590+
add_tfunc(Core._svec_len, 1, 1, _svec_len_tfunc, 1)
591+
583592
@nospecs function _svec_ref_tfunc(𝕃::AbstractLattice, s, i)
584593
if isa(s, Const) && isa(i, Const)
585594
s, i = s.val, i.val
@@ -1960,15 +1969,8 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
19601969
# UnionAll context is missing around this.
19611970
pop!(argtypes)
19621971
end
1963-
all_are_const = true
1964-
for i in 1:length(argtypes)
1965-
if !isa(argtypes[i], Const)
1966-
all_are_const = false
1967-
break
1968-
end
1969-
end
1970-
if all_are_const
1971-
return Const(ntuple(i::Int->argtypes[i].val, length(argtypes)))
1972+
if is_all_const_arg(argtypes, 1) # repeated from builtin_tfunction for the benefit of callers that use this tfunc directly
1973+
return Const(tuple(collect_const_args(argtypes, 1)...))
19721974
end
19731975
params = Vector{Any}(undef, length(argtypes))
19741976
anyinfo = false
@@ -2334,14 +2336,17 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
23342336
elseif f === Core.compilerbarrier
23352337
na == 2 || return false
23362338
return compilerbarrier_nothrow(argtypes[1], nothing)
2339+
elseif f === Core._svec_len
2340+
na == 1 || return false
2341+
return _svec_len_tfunc(𝕃, argtypes[1]) isa Const
23372342
elseif f === Core._svec_ref
23382343
na == 2 || return false
23392344
return _svec_ref_tfunc(𝕃, argtypes[1], argtypes[2]) isa Const
23402345
end
23412346
return false
23422347
end
23432348

2344-
# known to be always effect-free (in particular nothrow)
2349+
# known to be always effect-free (in particular also nothrow)
23452350
const _PURE_BUILTINS = Any[
23462351
tuple,
23472352
svec,
@@ -2370,6 +2375,8 @@ const _CONSISTENT_BUILTINS = Any[
23702375
donotdelete,
23712376
memoryrefnew,
23722377
memoryrefoffset,
2378+
Core._svec_len,
2379+
Core._svec_ref,
23732380
]
23742381

23752382
# known to be effect-free (but not necessarily nothrow)
@@ -2394,6 +2401,7 @@ const _EFFECT_FREE_BUILTINS = [
23942401
Core.throw_methoderror,
23952402
getglobal,
23962403
compilerbarrier,
2404+
Core._svec_len,
23972405
Core._svec_ref,
23982406
]
23992407

@@ -2428,6 +2436,7 @@ const _ARGMEM_BUILTINS = Any[
24282436
replacefield!,
24292437
setfield!,
24302438
swapfield!,
2439+
Core._svec_len,
24312440
Core._svec_ref,
24322441
]
24332442

@@ -2571,6 +2580,7 @@ const _EFFECTS_KNOWN_BUILTINS = Any[
25712580
# Core._primitivetype,
25722581
# Core._setsuper!,
25732582
# Core._structtype,
2583+
Core._svec_len,
25742584
Core._svec_ref,
25752585
# Core._typebody!,
25762586
Core._typevar,
@@ -2675,7 +2685,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
26752685
else
26762686
if contains_is(_CONSISTENT_BUILTINS, f)
26772687
consistent = ALWAYS_TRUE
2678-
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_ref
2688+
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned || f === Core._svec_len || f === Core._svec_ref
26792689
consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
26802690
elseif f === Core._typevar || f === Core.memorynew
26812691
consistent = CONSISTENT_IF_NOTRETURNED
@@ -2784,11 +2794,12 @@ end
27842794
function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any},
27852795
sv::Union{AbsIntState, Nothing})
27862796
𝕃ᵢ = typeinf_lattice(interp)
2787-
if isa(f, IntrinsicFunction)
2788-
if is_pure_intrinsic_infer(f) && all(@nospecialize(a) -> isa(a, Const), argtypes)
2789-
argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
2797+
# Early constant evaluation for foldable builtins with all const args
2798+
if isa(f, IntrinsicFunction) ? is_pure_intrinsic_infer(f) : (f in _PURE_BUILTINS || (f in _CONSISTENT_BUILTINS && f in _EFFECT_FREE_BUILTINS))
2799+
if is_all_const_arg(argtypes, 1)
2800+
argvals = collect_const_args(argtypes, 1)
27902801
try
2791-
# unroll a few cases which have specialized codegen
2802+
# unroll a few common cases for better codegen
27922803
if length(argvals) == 1
27932804
return Const(f(argvals[1]))
27942805
elseif length(argvals) == 2
@@ -2802,6 +2813,8 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
28022813
return Bottom
28032814
end
28042815
end
2816+
end
2817+
if isa(f, IntrinsicFunction)
28052818
iidx = Int(reinterpret(Int32, f)) + 1
28062819
if iidx < 0 || iidx > length(T_IFUNC)
28072820
# unknown intrinsic
@@ -2828,6 +2841,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
28282841
end
28292842
tf = T_FFUNC_VAL[fidx]
28302843
end
2844+
28312845
if hasvarargtype(argtypes)
28322846
if length(argtypes) - 1 > tf[2]
28332847
# definitely too many arguments

Compiler/test/effects.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1474,7 +1474,7 @@ end
14741474
let effects = Base.infer_effects((Core.SimpleVector,Int); optimize=false) do svec, i
14751475
Core._svec_ref(svec, i)
14761476
end
1477-
@test !Compiler.is_consistent(effects)
1477+
@test Compiler.is_consistent(effects)
14781478
@test Compiler.is_effect_free(effects)
14791479
@test !Compiler.is_nothrow(effects)
14801480
@test Compiler.is_terminates(effects)

base/essentials.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -979,11 +979,7 @@ setindex!(A::MemoryRef{Any}, @nospecialize(x)) = (memoryrefset!(A, x, :not_atomi
979979

980980
getindex(v::SimpleVector, i::Int) = (@_foldable_meta; Core._svec_ref(v, i))
981981
function length(v::SimpleVector)
982-
@_total_meta
983-
t = @_gc_preserve_begin v
984-
len = unsafe_load(Ptr{Int}(pointer_from_objref(v)))
985-
@_gc_preserve_end t
986-
return len
982+
Core._svec_len(v)
987983
end
988984
firstindex(v::SimpleVector) = 1
989985
lastindex(v::SimpleVector) = length(v)

src/builtin_proto.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ extern "C" {
2020
XX(_primitivetype,"_primitivetype") \
2121
XX(_setsuper,"_setsuper!") \
2222
XX(_structtype,"_structtype") \
23+
XX(_svec_len,"_svec_len") \
2324
XX(_svec_ref,"_svec_ref") \
2425
XX(_typebody,"_typebody!") \
2526
XX(_typevar,"_typevar") \

src/builtins.c

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -701,9 +701,15 @@ JL_CALLABLE(jl_f__apply_iterate)
701701
return (jl_value_t*)t;
702702
}
703703
}
704-
else if (f == BUILTIN(tuple) && jl_is_tuple(args[1])) {
705-
return args[1];
704+
else if (f == BUILTIN(tuple)) {
705+
if (jl_is_tuple(args[1]))
706+
return args[1];
707+
if (jl_is_svec(args[1]))
708+
return jl_f_tuple(NULL, jl_svec_data(args[1]), jl_svec_len(args[1]));
706709
}
710+
// optimization for `f(svec...)`
711+
if (jl_is_svec(args[1]))
712+
return jl_apply_generic(f, jl_svec_data(args[1]), jl_svec_len(args[1]));
707713
}
708714
// estimate how many real arguments we appear to have
709715
size_t precount = 1;
@@ -2151,6 +2157,14 @@ JL_CALLABLE(jl_f__compute_sparams)
21512157
return (jl_value_t*)env;
21522158
}
21532159

2160+
JL_CALLABLE(jl_f__svec_len)
2161+
{
2162+
JL_NARGS(_svec_len, 1, 1);
2163+
jl_svec_t *s = (jl_svec_t*)args[0];
2164+
JL_TYPECHK(_svec_len, simplevector, (jl_value_t*)s);
2165+
return jl_box_long(jl_svec_len(s));
2166+
}
2167+
21542168
JL_CALLABLE(jl_f__svec_ref)
21552169
{
21562170
JL_NARGS(_svec_ref, 2, 2);

src/codegen.cpp

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4065,21 +4065,38 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
40654065
}
40664066
}
40674067

4068-
else if ((f == BUILTIN(_apply_iterate) && nargs == 3) && ctx.vaSlot > 0) {
4068+
else if (f == BUILTIN(_apply_iterate) && nargs == 3) {
40694069
// turn Core._apply_iterate(iter, f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the va allocation
4070-
if (LoadInst *load = dyn_cast_or_null<LoadInst>(argv[3].V)) {
4071-
if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
4072-
Value *theF = boxed(ctx, argv[2]);
4073-
Value *nva = emit_n_varargs(ctx);
4070+
if (ctx.vaSlot > 0) {
4071+
if (LoadInst *load = dyn_cast_or_null<LoadInst>(argv[3].V)) {
4072+
if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
4073+
Value *theF = boxed(ctx, argv[2]);
4074+
Value *nva = emit_n_varargs(ctx);
40744075
#ifdef _P64
4075-
nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
4076+
nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
40764077
#endif
4077-
Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
4078-
Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
4079-
*ret = mark_julia_type(ctx, r, true, jl_any_type);
4080-
return true;
4078+
Value *theArgs = emit_ptrgep(ctx, ctx.argArray, ctx.nReqArgs * sizeof(jl_value_t*));
4079+
Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
4080+
*ret = mark_julia_type(ctx, r, true, jl_any_type);
4081+
return true;
4082+
}
40814083
}
40824084
}
4085+
// optimization for _apply_iterate when there is one argument and it is a SimpleVector
4086+
const jl_cgval_t &arg = argv[3];
4087+
if (arg.typ == (jl_value_t*)jl_simplevector_type) {
4088+
Value *theF = boxed(ctx, argv[2]);
4089+
Value *svec_val = boxed(ctx, arg);
4090+
Value *svec_len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, decay_derived(ctx, svec_val), Align(ctx.types().sizeof_ptr));
4091+
#ifdef _P64
4092+
svec_len = ctx.builder.CreateTrunc(svec_len, getInt32Ty(ctx.builder.getContext()));
4093+
#endif
4094+
Value *svec_data = emit_ptrgep(ctx, emit_pointer_from_objref(ctx, svec_val), ctx.types().sizeof_ptr);
4095+
OperandBundleDef OpBundle("jl_roots", svec_val);
4096+
Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, svec_data, svec_len }, OpBundle);
4097+
*ret = mark_julia_type(ctx, r, true, jl_any_type);
4098+
return true;
4099+
}
40834100
}
40844101

40854102
else if (f == BUILTIN(tuple)) {
@@ -4093,6 +4110,26 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
40934110
}
40944111
}
40954112

4113+
else if (f == BUILTIN(svec)) {
4114+
if (nargs == 0) {
4115+
*ret = mark_julia_const(ctx, (jl_value_t*)jl_emptysvec);
4116+
return true;
4117+
}
4118+
Value *svec = emit_allocobj(ctx, ctx.types().sizeof_ptr * (nargs + 1), ctx.builder.CreateIntToPtr(emit_tagfrom(ctx, jl_simplevector_type), ctx.types().T_pjlvalue), true, julia_alignment((jl_value_t*)jl_simplevector_type));
4119+
Value *svec_derived = decay_derived(ctx, svec);
4120+
ctx.builder.CreateAlignedStore(ConstantInt::get(ctx.types().T_size, nargs), svec_derived, Align(ctx.types().sizeof_ptr));
4121+
Value *svec_data = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr);
4122+
ctx.builder.CreateMemSet(svec_data, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ctx.types().sizeof_ptr * nargs, Align(ctx.types().sizeof_ptr));
4123+
for (size_t i = 0; i < nargs; i++) {
4124+
Value *elem = boxed(ctx, argv[i + 1]);
4125+
Value *elem_ptr = emit_ptrgep(ctx, svec_derived, ctx.types().sizeof_ptr * (i + 1));
4126+
ctx.builder.CreateAlignedStore(elem, elem_ptr, Align(ctx.types().sizeof_ptr));
4127+
emit_write_barrier(ctx, svec, elem);
4128+
}
4129+
*ret = mark_julia_type(ctx, svec, true, jl_simplevector_type);
4130+
return true;
4131+
}
4132+
40964133
else if (f == BUILTIN(throw) && nargs == 1) {
40974134
Value *arg1 = boxed(ctx, argv[1]);
40984135
raise_exception(ctx, arg1);
@@ -4601,6 +4638,20 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
46014638
return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
46024639
}
46034640

4641+
else if (f == BUILTIN(_svec_len) && nargs == 1) {
4642+
const jl_cgval_t &obj = argv[1];
4643+
Value *len;
4644+
if (obj.constant && jl_is_svec(obj.constant)) {
4645+
len = ConstantInt::get(ctx.types().T_size, jl_svec_len(obj.constant));
4646+
}
4647+
else {
4648+
Value *svec_val = decay_derived(ctx, boxed(ctx, obj));
4649+
len = ctx.builder.CreateAlignedLoad(ctx.types().T_size, svec_val, Align(ctx.types().sizeof_ptr));
4650+
}
4651+
*ret = mark_julia_type(ctx, len, false, jl_long_type);
4652+
return true;
4653+
}
4654+
46044655
else if (f == BUILTIN(nfields) && nargs == 1) {
46054656
const jl_cgval_t &obj = argv[1];
46064657
if (ctx.vaSlot > 0) {

0 commit comments

Comments
 (0)