Skip to content

Commit f7ce86c

Browse files
committed
Doc updates, rename some internal functions.
1 parent 35d0474 commit f7ce86c

File tree

10 files changed

+50
-42
lines changed

10 files changed

+50
-42
lines changed

docs/src/devdocs/constructing_loopsets.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@ quote
1515
var"##vptr##_A" = LoopVectorization.stridedpointer(A)
1616
var"##vptr##_B" = LoopVectorization.stridedpointer(B)
1717
begin
18-
$(Expr(:gc_preserve, :(LoopVectorization._avx_!(Val{(0, 0)}(), Tuple{:numericconstant, Symbol("##zero#270"), LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.constant, 0x00, 0x01), :LoopVectorization, :setindex!, LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000000, 0x0000000000000007, LoopVectorization.memstore, 0x01, 0x02), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x0000000000000013, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.memload, 0x02, 0x03), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x0000000000000032, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.memload, 0x03, 0x04), :numericconstant, Symbol("##reductzero#274"), LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000003, 0x0000000000000000, LoopVectorization.constant, 0x00, 0x05), :LoopVectorization, :vfmadd_fast, LoopVectorization.OperationStruct(0x0000000000000132, 0x0000000000000003, 0x0000000000000000, 0x0000000000030405, LoopVectorization.compute, 0x00, 0x05), :LoopVectorization, :reduce_to_add, LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000003, 0x0000000000000000, 0x0000000000000601, LoopVectorization.compute, 0x00, 0x01)}, Tuple{LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000102, 0xffffffffffffe03b), LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000103, 0xffffffffffffffd6), LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000302, 0xffffffffffffe056), LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000102, 0xffffffffffffffd6)}, Tuple{0, Tuple{}, Tuple{}, Tuple{}, Tuple{}, Tuple{(1, LoopVectorization.IntOrFloat), (5, LoopVectorization.IntOrFloat)}, Tuple{}}, (LoopVectorization.StaticLowerUnitRange{0}(M), LoopVectorization.StaticLowerUnitRange{0}(N), LoopVectorization.StaticLowerUnitRange{0}(K)), var"##vptr##_C", var"##vptr##_A", var"##vptr##_B", var"##vptr##_C")), :C, :A, :B))
18+
$(Expr(:gc_preserve, :(LoopVectorization._turbo_!(Val{(0, 0)}(), Tuple{:numericconstant, Symbol("##zero#270"), LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.constant, 0x00, 0x01), :LoopVectorization, :setindex!, LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000000, 0x0000000000000007, LoopVectorization.memstore, 0x01, 0x02), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x0000000000000013, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.memload, 0x02, 0x03), :LoopVectorization, :getindex, LoopVectorization.OperationStruct(0x0000000000000032, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, LoopVectorization.memload, 0x03, 0x04), :numericconstant, Symbol("##reductzero#274"), LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000000, 0x0000000000000003, 0x0000000000000000, LoopVectorization.constant, 0x00, 0x05), :LoopVectorization, :vfmadd_fast, LoopVectorization.OperationStruct(0x0000000000000132, 0x0000000000000003, 0x0000000000000000, 0x0000000000030405, LoopVectorization.compute, 0x00, 0x05), :LoopVectorization, :reduce_to_add, LoopVectorization.OperationStruct(0x0000000000000012, 0x0000000000000003, 0x0000000000000000, 0x0000000000000601, LoopVectorization.compute, 0x00, 0x01)}, Tuple{LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000102, 0xffffffffffffe03b), LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000103, 0xffffffffffffffd6), LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000302, 0xffffffffffffe056), LoopVectorization.ArrayRefStruct(0x0000000000000101, 0x0000000000000102, 0xffffffffffffffd6)}, Tuple{0, Tuple{}, Tuple{}, Tuple{}, Tuple{}, Tuple{(1, LoopVectorization.IntOrFloat), (5, LoopVectorization.IntOrFloat)}, Tuple{}}, (LoopVectorization.StaticLowerUnitRange{0}(M), LoopVectorization.StaticLowerUnitRange{0}(N), LoopVectorization.StaticLowerUnitRange{0}(K)), var"##vptr##_C", var"##vptr##_A", var"##vptr##_B", var"##vptr##_C")), :C, :A, :B))
1919
end
2020
end
2121
```
22-
When the corresponding method gets compiled for specific type of `A`, `B`, and `C`, the call to the `@generated` function `_avx_!` get compiled. This causes the summary to be [reconstructed](https://github.com/JuliaSIMD/LoopVectorization.jl/blob/master/src/reconstruct_loopset.jl) using the available type information. This type information can be used, for example, to realize an array has been transposed, and thus correctly identify which axis contains contiguous elements that are efficient to load from. This kind of information cannot be extracted from the raw expression, which is why these decisions are made when the method gets compiled for specific types via the `@generated` function `_avx_!`.
22+
When the corresponding method gets compiled for specific type of `A`, `B`, and `C`, the call to the `@generated` function `_turbo_!` get compiled. This causes the summary to be [reconstructed](https://github.com/JuliaSIMD/LoopVectorization.jl/blob/master/src/reconstruct_loopset.jl) using the available type information. This type information can be used, for example, to realize an array has been transposed, and thus correctly identify which axis contains contiguous elements that are efficient to load from. This kind of information cannot be extracted from the raw expression, which is why these decisions are made when the method gets compiled for specific types via the `@generated` function `_turbo_!`.
2323

2424
The three chief components of the summaries are the definitions of operations, e.g.:
2525
```julia

src/LoopVectorization.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ using Requires
5151

5252

5353
export LowDimArray, stridedpointer, indices,
54-
@avx, @avxt, @turbo, @tturbo, *ˡ, _avx_!,
54+
@avx, @avxt, @turbo, @tturbo, *ˡ, _turbo_!,
5555
vmap, vmap!, vmapt, vmapt!, vmapnt, vmapnt!, vmapntt, vmapntt!,
5656
tanh_fast, sigmoid_fast,
5757
vfilter, vfilter!, vmapreduce, vreduce
@@ -97,8 +97,9 @@ include("broadcast.jl")
9797
LoopVectorization provides macros and functions that combine SIMD vectorization and
9898
loop-reordering so as to improve performance:
9999
100-
- [`@avx`](@ref): transform `for`-loops and broadcasting
101-
- [`@_avx`](@ref): similar to `@avx` but does not use type information
100+
- [`@turbo`](@ref): transform `for`-loops and broadcasting
101+
- [`vmapreduce`](@ref): vectorized version of `mapreduce`
102+
- [`vreduce`](@ref): vectorized version of `reduce`
102103
- [`vmap`](@ref) and `vmap!`: vectorized version of `map` and `map!`
103104
- [`vmapnt`](@ref) and `vmapnt!`: non-temporal variants of `vmap` and `vmap!`
104105
- [`vmapntt`](@ref) and `vmapntt!`: threaded variants of `vmapnt` and `vmapnt!`

src/codegen/loopstartstopmanager.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ function use_loop_induct_var!(
554554
for (i,isli) enumerate(li)
555555
ii = i + offset
556556
ind = indices[ii]
557-
Wisz && push!(gespinds.args, staticexpr(0)) # wrong for `@_avx`...
557+
Wisz && push!(gespinds.args, staticexpr(0)) # wrong for `@_turbo`...
558558
if !li[i] # if it wasn't set
559559
uliv[i] = 0
560560
push!(offsetprecalc_descript.args, 0)

src/codegen/lower_threads.jl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,26 @@
1-
struct AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV} <: Function end
1+
struct TURBO{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV} <: Function end
22

3-
# This should call the same `_avx_!(Val{UNROLL}(), Val{OPS}(), Val{ARF}(), Val{AM}(), Val{LPSYM}(), _vargs)` as normal so that this
3+
# This should call the same `_turbo_!(Val{UNROLL}(), Val{OPS}(), Val{ARF}(), Val{AM}(), Val{LPSYM}(), _vargs)` as normal so that this
44
# hopefully shouldn't add much to compile time.
55

6-
function (::AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV})(p::Ptr{UInt}) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV,FLBV<:Tuple{Vararg{Any,K}}}
6+
function (::TURBO{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV})(p::Ptr{UInt}) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV,FLBV<:Tuple{Vararg{Any,K}}}
77
(_, _vargs) = ThreadingUtilities.load(p, FLBV, 2*sizeof(UInt))
88
# Main.VARGS[Threads.threadid()] = first(_vargs)
99
# Threads.threadid() == 2 && Core.println(typeof(_vargs))
10-
ret = _avx_!(Val{UNROLL}(), Val{OPS}(), Val{ARF}(), Val{AM}(), Val{LPSYM}(), Val{LBV}(), _vargs...)
10+
ret = _turbo_!(Val{UNROLL}(), Val{OPS}(), Val{ARF}(), Val{AM}(), Val{LPSYM}(), Val{LBV}(), _vargs...)
1111
ThreadingUtilities.store!(p, ret, Int(register_size()))
1212
nothing
1313
end
14-
@generated function Base.pointer(::AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV}) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV,FLBV<:Tuple{Vararg{Any,K}}}
15-
f = AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV}()
14+
@generated function Base.pointer(::TURBO{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV}) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV,FLBV<:Tuple{Vararg{Any,K}}}
15+
f = TURBO{UNROLL,OPS,ARF,AM,LPSYM,LBV,FLBV}()
1616
precompile(f, (Ptr{UInt},))
1717
quote
1818
$(Expr(:meta,:inline))
1919
@cfunction($f, Cvoid, (Ptr{UInt},))
2020
end
2121
end
2222

23-
@inline function setup_avx_threads!(p::Ptr{UInt}, fptr::Ptr{Cvoid}, args::LBV) where {K,LBV<:Tuple{Vararg{Any,K}}}
23+
@inline function setup_turbo_threads!(p::Ptr{UInt}, fptr::Ptr{Cvoid}, args::LBV) where {K,LBV<:Tuple{Vararg{Any,K}}}
2424
offset = ThreadingUtilities.store!(p, fptr, sizeof(UInt))
2525
offset = ThreadingUtilities.store!(p, args, offset)
2626
nothing
@@ -32,7 +32,7 @@ struct StaticType{T} end
3232
@inline function avx_launch(
3333
::Val{UNROLL}, ::Val{OPS}, ::Val{ARF}, ::Val{AM}, ::Val{LPSYM}, ::StaticType{LBV}, fargs::FARGS, tid
3434
) where {UNROLL,OPS,ARF,AM,LPSYM,K,LBV<:Tuple{Vararg{Any,K}},FARGS}
35-
ThreadingUtilities.launch(setup_avx_threads!, tid, pointer(AVX{UNROLL,OPS,ARF,AM,LPSYM,LBV,FARGS}()), fargs)
35+
ThreadingUtilities.launch(setup_turbo_threads!, tid, pointer(TURBO{UNROLL,OPS,ARF,AM,LPSYM,LBV,FARGS}()), fargs)
3636
end
3737

3838
# function approx_cbrt(x)
@@ -367,10 +367,10 @@ function thread_one_loops_expr(
367367
end
368368
end
369369
avxcall_args = Expr(:tuple, lastboundexpr, Symbol("#vargs#"))
370-
_avx_call_ = :(_avx_!(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, Val(typeof(var"#avx#call#args#")), flatten_to_tuple(var"#avx#call#args#")...))
370+
_turbo_call_ = :(_turbo_!(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, Val(typeof(var"#avx#call#args#")), flatten_to_tuple(var"#avx#call#args#")...))
371371
update_return_values = if length(ls.outer_reductions) > 0
372372
retv = loopset_return_value(ls, Val(false))
373-
_avx_call_ = Expr(:(=), retv, _avx_call_)
373+
_turbo_call_ = Expr(:(=), retv, _turbo_call_)
374374
outer_reduct_combine_expressions(ls, retv)
375375
else
376376
nothing
@@ -420,7 +420,7 @@ function thread_one_loops_expr(
420420
var"#threads#" = Polyester.UnsignedIteratorEarlyStop(var"#torelease#", 0x00000000)
421421
end
422422
var"#avx#call#args#" = $avxcall_args
423-
$_avx_call_
423+
$_turbo_call_
424424
var"##do#thread##" || $retexpr
425425
var"#thread#id#" = 0x00000000
426426
var"#thread#mask#" = Polyester.mask(var"#threads#")
@@ -516,12 +516,12 @@ function thread_two_loops_expr(
516516
end
517517
end
518518
avxcall_args = Expr(:tuple, lastboundexpr, Symbol("#vargs#"))
519-
_avx_call_ = :(_avx_!(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, Val(typeof(var"#avx#call#args#")), flatten_to_tuple(var"#avx#call#args#")...))
520-
# _avx_orig_ = :(_avx_!(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, var"#lv#tuple#args#"))
519+
_turbo_call_ = :(_turbo_!(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, Val(typeof(var"#avx#call#args#")), flatten_to_tuple(var"#avx#call#args#")...))
520+
# _turbo_orig_ = :(_turbo_!(Val{$UNROLL}(), $OPS, $ARF, $AM, $LPSYM, var"#lv#tuple#args#"))
521521
update_return_values = if length(ls.outer_reductions) > 0
522522
retv = loopset_return_value(ls, Val(false))
523-
_avx_call_ = Expr(:(=), retv, _avx_call_)
524-
# _avx_orig_ = Expr(:(=), retv, _avx_orig_)
523+
_turbo_call_ = Expr(:(=), retv, _turbo_call_)
524+
# _turbo_orig_ = Expr(:(=), retv, _turbo_orig_)
525525
outer_reduct_combine_expressions(ls, retv)
526526
else
527527
nothing
@@ -535,7 +535,7 @@ function thread_two_loops_expr(
535535
$loopstart1
536536
$loopstart2
537537
# if var"#nthreads#" ≤ 1
538-
# $_avx_orig_
538+
# $_turbo_orig_
539539
# return $retexpr
540540
# end
541541
$define_len1
@@ -614,7 +614,7 @@ function thread_two_loops_expr(
614614
end
615615
# @show $lastboundexpr
616616
var"#avx#call#args#" = $avxcall_args
617-
$_avx_call_
617+
$_turbo_call_
618618
var"##do#thread##" || $retexpr
619619
# @show $retv
620620
var"#thread#id#" = 0x00000000

src/condense_loopset.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,7 @@ end
527527
# _first_cache_size(::Nothing) = StaticInt(262144)
528528
# first_cache_size() = _first_cache_size(cache_size(first_cache()))
529529

530-
@generated function _avx_config_val(
530+
@generated function _turbo_config_val(
531531
::Val{CNFARG}, ::StaticInt{W}, ::StaticInt{RS}, ::StaticInt{AR}, ::StaticInt{NT},
532532
::StaticInt{CLS}, ::StaticInt{L1}, ::StaticInt{L2}, ::StaticInt{L3}
533533
) where {CNFARG,W,RS,AR,CLS,L1,L2,L3,NT}
@@ -539,7 +539,7 @@ end
539539
@inline function avx_config_val(
540540
::Val{CNFARG}, ::StaticInt{W}
541541
) where {CNFARG,W}
542-
_avx_config_val(
542+
_turbo_config_val(
543543
Val{CNFARG}(), StaticInt{W}(), register_size(), available_registers(), lv_max_num_threads(),
544544
cache_linesize(), cache_size(StaticInt(1)), cache_size(StaticInt(2)), cache_size(StaticInt(3))
545545
)
@@ -666,7 +666,7 @@ function generate_call_types(
666666
argmeta = argmeta_and_consts_description(ls, arraysymbolinds)
667667
loop_bounds = loop_boundaries(ls, shouldindbyind)
668668
loop_syms = tuple_expr(QuoteNode, ls.loopsymbols)
669-
func = debug ? lv(:_avx_loopset_debug) : lv(:_avx_!)
669+
func = debug ? lv(:_turbo_loopset_debug) : lv(:_turbo_!)
670670
lbarg = debug ? Expr(:call, :typeof, loop_bounds) : loop_bounds
671671
configarg = (inline,u₁,u₂,ls.isbroadcast,thread)
672672
unroll_param_tup = Expr(:call, lv(:avx_config_val), :(Val{$configarg}()), VECTORWIDTHSYMBOL)

src/constructors.jl

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,9 @@ end
129129
Annotate a `for` loop, or a set of nested `for` loops whose bounds are constant across iterations, to optimize the computation. For example:
130130
131131
function AmulB!(C, A, B)
132-
@turbo for m ∈ 1:size(A,1), n ∈ 1:size(B,2)
132+
@turbo for m ∈ indices((A,C), 1), n ∈ indices((B,C), 2) # indices((A,C),1) == axes(A,1) == axes(C,1)
133133
Cₘₙ = zero(eltype(C))
134-
for k ∈ 1:size(A,2)
134+
for k ∈ indices((A,B), (2,1)) # indices((A,B), (2,1)) == axes(A,2) == axes(B,1)
135135
Cₘₙ += A[m,k] * B[k,n]
136136
end
137137
C[m,n] = Cₘₙ
@@ -163,7 +163,7 @@ true
163163
Advanced users can customize the implementation of the `@turbo`-annotated block
164164
using keyword arguments:
165165
166-
```
166+
```julia
167167
@turbo inline=false unroll=2 body
168168
```
169169
@@ -175,9 +175,9 @@ It is clamped to be between `1` and `min(Threads.nthreads(),LoopVectorization.nu
175175
`false` is equivalent to `1`, and `true` is equivalent to `min(Threads.nthreads(),LoopVectorization.num_cores())`.
176176
177177
`inline` is a Boolean. When `true`, `body` will be directly inlined
178-
into the function (via a forced-inlining call to `_avx_!`).
179-
When `false`, it wont force inlining of the call to `_avx_!` instead, letting Julia's own inlining engine
180-
determine whether the call to `_avx_!` should be inlined. (Typically, it won't.)
178+
into the function (via a forced-inlining call to `_turbo_!`).
179+
When `false`, it wont force inlining of the call to `_turbo_!` instead, letting Julia's own inlining engine
180+
determine whether the call to `_turbo_!` should be inlined. (Typically, it won't.)
181181
Sometimes not inlining can lead to substantially worse code generation, and >40% regressions, even in very
182182
large problems (2-d convolutions are a case where this has been observed).
183183
One can find some circumstances where `inline=true` is faster, and other circumstances
@@ -208,6 +208,8 @@ macro turbo(args...)
208208
turbo_macro(__module__, __source__, last(args), Base.front(args)...)
209209
end
210210
"""
211+
@tturbo
212+
211213
Equivalent to `@turbo`, except it adds `thread=true` as the first keyword argument.
212214
Note that later arguments take precendence.
213215
@@ -249,6 +251,11 @@ macro _turbo(arg, q)
249251
esc(Expr(:block, ls.prepreamble, lower(ls, u₁ % Int, u₂ % Int, -1)))
250252
end
251253

254+
"""
255+
@turbo_debug
256+
257+
Returns a `LoopSet` object instead of evaluating the loops. Useful for debugging and introspection.
258+
"""
252259
macro turbo_debug(q)
253260
q = macroexpand(__module__, q)
254261
ls = LoopSet(q, __module__)

src/modeling/graphs.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ end
564564

565565
"""
566566
Used internally to create symbols unique for this loopset.
567-
This is used so that identical loops will create identical `_avx_!` calls in the macroexpansions, hopefully reducing recompilation.
567+
This is used so that identical loops will create identical `_turbo_!` calls in the macroexpansions, hopefully reducing recompilation.
568568
"""
569569
gensym!(ls::LoopSet, s) = Symbol("###$(s)###$(ls.symcounter += 1)###")
570570

src/precompile.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
function _precompile_()
22
ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
3-
# Base.precompile(Tuple{typeof(which(_avx_!,(Val{UNROLL},Val{OPS},Val{ARF},Val{AM},Val{LPSYM},Tuple{LB, V},)).generator.gen),Any,Any,Any,Any,Any,Any,Any,Any,Type,Type,Type,Type,Any,Any}) # time: 1.0198073
3+
# Base.precompile(Tuple{typeof(which(_turbo_!,(Val{UNROLL},Val{OPS},Val{ARF},Val{AM},Val{LPSYM},Tuple{LB, V},)).generator.gen),Any,Any,Any,Any,Any,Any,Any,Any,Type,Type,Type,Type,Any,Any}) # time: 1.0198073
44
# Base.precompile(Tuple{typeof(gespf1),Any,Tuple{Any, VectorizationBase.NullStep}}) # time: 0.1096832
55
Base.precompile(Tuple{typeof(turbo_macro),Module,LineNumberNode,Expr}) # time: 0.09183489
66
Base.precompile(Tuple{typeof(gespf1),StridedPointer{Float64, 1, 1, 0, (1,), Tuple{StaticInt{8}}, Tuple{StaticInt{1}}},Tuple{StaticInt{1}}}) # time: 0.05469272
77
Base.precompile(Tuple{typeof(zerorangestart),UnitRange{Int}}) # time: 0.04291692
88
Base.precompile(Tuple{Type{LoopSet},Symbol}) # time: 0.03362425
99
Base.precompile(Tuple{typeof(recursive_muladd_search!),Expr,Vector{Any},Nothing,Bool,Bool}) # time: 0.029960306
1010
Base.precompile(Tuple{typeof(add_constant!),LoopSet,Float64,Vector{Symbol},Symbol,Int}) # time: 0.027501073
11-
Base.precompile(Tuple{typeof(_avx_loopset),Any,Any,Any,Any,Core.SimpleVector,Core.SimpleVector,Tuple{Bool, Int8, Int8, Bool, Int, Int, Int, Int, Int, Int, Int, UInt}}) # time: 0.02345788
11+
Base.precompile(Tuple{typeof(_turbo_loopset),Any,Any,Any,Any,Core.SimpleVector,Core.SimpleVector,Tuple{Bool, Int8, Int8, Bool, Int, Int, Int, Int, Int, Int, Int, UInt}}) # time: 0.02345788
1212
Base.precompile(Tuple{typeof(substitute_broadcast),Expr,Symbol,Bool,Int8,Int8,Int}) # time: 0.02281322
1313
Base.precompile(Tuple{typeof(push!),LoopSet,Expr,Int,Int}) # time: 0.022659862
1414
Base.precompile(Tuple{typeof(add_compute!),LoopSet,Symbol,Expr,Int,Int,Nothing}) # time: 0.02167476

0 commit comments

Comments
 (0)