Skip to content

Commit ca1cf8a

Browse files
committed
2 parents 93fef1c + 307d6b5 commit ca1cf8a

File tree

5 files changed

+60
-11
lines changed

5 files changed

+60
-11
lines changed

src/broadcast.jl

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,9 @@ the final operation.
6060
# TODO: Need to make this handle A or B being (1 or 2)-D broadcast objects.
6161
function add_broadcast!(
6262
ls::LoopSet, mC::Symbol, bcname::Symbol, loopsyms::Vector{Symbol},
63-
::Type{Product{A,B}}, elementbytes::Int
64-
) where {A, B}
63+
@nospecialize(prod::Type{<:Product}), elementbytes::Int
64+
)
65+
A, B = prod.parameters
6566
K = gensym(:K)
6667
mA = gensym(:Aₘₖ)
6768
mB = gensym(:Bₖₙ)
@@ -126,9 +127,10 @@ function LowDimArray{D}(data::A) where {D,T,N,A <: AbstractArray{T,N}}
126127
end
127128
function add_broadcast!(
128129
ls::LoopSet, destname::Symbol, bcname::Symbol, loopsyms::Vector{Symbol},
129-
::Type{<:LowDimArray{D,T,N}}, elementbytes::Int
130-
) where {D,T,N}
131-
fulldims = Symbol[loopsyms[n] for n 1:N if D[n]]
130+
@nospecialize(LDA::Type{<:LowDimArray}), elementbytes::Int
131+
)
132+
D,T,N::Int,_ = LDA.parameters
133+
fulldims = Symbol[loopsyms[n] for n 1:N if D[n]::Bool]
132134
ref = ArrayReference(bcname, fulldims)
133135
add_simple_load!(ls, destname, ref, elementbytes, true, false )::Operation
134136
end
@@ -185,11 +187,13 @@ function add_broadcast!(
185187
inds[2:end] .= @view(loopsyms[1:N])
186188
add_simple_load!(ls, destname, ArrayReference(bcname, inds), elementbytes, true, true)
187189
end
190+
BroadcastedArray{S<:Broadcast.AbstractArrayStyle,F,A} = Broadcasted{S,Nothing,F,A}
188191
function add_broadcast!(
189192
ls::LoopSet, destname::Symbol, bcname::Symbol, loopsyms::Vector{Symbol},
190-
::Type{Broadcasted{S,Nothing,F,A}},
193+
@nospecialize(B::Type{<:BroadcastedArray}),
191194
elementbytes::Int
192-
) where {N,S<:Base.Broadcast.AbstractArrayStyle{N},F,A}
195+
)
196+
S,_,F,A = B.parameters
193197
instr = get(FUNCTIONSYMBOLS, F) do
194198
f = gensym(:func)
195199
pushpreamble!(ls, Expr(:(=), f, Expr(:(.), bcname, QuoteNode(:f))))

src/constructors.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ function Base.copyto!(ls::LoopSet, q::Expr)
66
add_loop!(ls, q, 8)
77
end
88

9-
function add_ci_call!(q::Expr, f, args, syms, i, mod = nothing)
9+
function add_ci_call!(q::Expr, @nospecialize(f), args, syms, i, mod = nothing)
1010
call = if f isa Core.SSAValue
1111
Expr(:call, syms[f.id])
1212
else

src/precompile.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
11
function _precompile_()
22
ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
3+
4+
precompile(Tuple{Type{LoopVectorization.LoopSet},Expr})
5+
precompile(Tuple{typeof(Base.mapreduce_impl),typeof(LoopVectorization.elsize),typeof(max),Array{LoopVectorization.Operation,1},Int64,Int64})
6+
precompile(Tuple{typeof(LoopVectorization._avx_loopset),Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Any})
7+
precompile(Tuple{typeof(LoopVectorization.add_broadcast!),LoopVectorization.LoopSet,Symbol,Symbol,Array{Symbol,1},Type{Array{Bool,1}},Int64})
8+
precompile(Tuple{typeof(LoopVectorization.add_ci_call!),Expr,Any,Array{Any,1},Array{Symbol,1},Int64,Symbol})
9+
precompile(Tuple{typeof(LoopVectorization.add_ci_call!),Expr,Any,Array{Any,1},Array{Symbol,1},Int64})
10+
precompile(Tuple{typeof(LoopVectorization.add_constant!),LoopVectorization.LoopSet,Float64,Array{Symbol,1},Symbol,Int64})
11+
precompile(Tuple{typeof(LoopVectorization.add_parent!),Array{LoopVectorization.Operation,1},Array{Symbol,1},Array{Symbol,1},LoopVectorization.LoopSet,Int64,Int64,Int64})
12+
precompile(Tuple{typeof(LoopVectorization.avx_body),LoopVectorization.LoopSet,Tuple{Int8,Int8}})
13+
precompile(Tuple{typeof(LoopVectorization.avx_loopset),Array{LoopVectorization.Instruction,1},Array{LoopVectorization.OperationStruct,1},Array{LoopVectorization.ArrayRefStruct,1},Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Any})
14+
precompile(Tuple{typeof(LoopVectorization.cost_vec_buf),LoopVectorization.LoopSet})
15+
precompile(Tuple{typeof(LoopVectorization.evaluate_cost_tile),LoopVectorization.LoopSet,Array{Symbol,1},Symbol,Symbol,Symbol})
16+
precompile(Tuple{typeof(LoopVectorization.evaluate_cost_unroll),LoopVectorization.LoopSet,Array{Symbol,1},Symbol,Float64})
17+
precompile(Tuple{typeof(LoopVectorization.lower_block),LoopVectorization.LoopSet,LoopVectorization.UnrollSpecification,Int64,Nothing,Int64})
18+
precompile(Tuple{typeof(LoopVectorization.lower_block),LoopVectorization.LoopSet,LoopVectorization.UnrollSpecification,Int64,Symbol,Int64})
19+
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Int64,Nothing,Bool})
20+
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Int64,Symbol,Bool})
21+
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Nothing,Nothing,Bool})
22+
precompile(Tuple{typeof(LoopVectorization.lower_compute!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Nothing,Symbol,Bool})
23+
precompile(Tuple{typeof(LoopVectorization.lower_load!),Expr,LoopVectorization.Operation,Symbol,LoopVectorization.LoopSet,Symbol,Symbol,Int64,Int64,Nothing})
24+
precompile(Tuple{typeof(LoopVectorization.lower_load_scalar!),Expr,LoopVectorization.Operation,Symbol,Symbol,Symbol,Symbol,Int64,Nothing,Int64})
25+
precompile(Tuple{typeof(LoopVectorization.reg_pres_buf),LoopVectorization.LoopSet})
26+
precompile(Tuple{typeof(LoopVectorization.setup_call),LoopVectorization.LoopSet})
27+
precompile(Tuple{typeof(LoopVectorization.solve_tilesize),SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},Int64},true},SubArray{Float64,1,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},Int64},true},Int64,Int64})
28+
precompile(Tuple{typeof(LoopVectorization.substitute_broadcast),Expr,Symbol})
29+
precompile(Tuple{typeof(LoopVectorization.vmap_quote),Int64,Type{Float32}})
30+
precompile(Tuple{typeof(println),Base.GenericIOBuffer{Array{UInt8,1}},Array{LoopVectorization.Operation,1}})
31+
precompile(Tuple{typeof(resize!),LoopVectorization.LoopOrder,Int64})
332
end

src/reconstruct_loopset.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ function add_loops!(ls::LoopSet, LPSYM, LB)
4545
end
4646
end
4747
end
48-
function add_loops!(ls::LoopSet, i::Int, sym::Symbol, l::Type{CartesianIndices{N,T}}) where {N,T}
48+
function add_loops!(ls::LoopSet, i::Int, sym::Symbol, @nospecialize(l::Type{<:CartesianIndices}))
49+
N, T = l.parameters
4950
ssym = String(sym)
5051
for k = N:-1:1
5152
axisexpr = Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), Expr(:ref, Expr(:., Expr(:ref, :lb, i), QuoteNode(:indices)), k))
@@ -375,7 +376,7 @@ function sizeofeltypes(v, num_arrays)::Int
375376
sizeof(T)
376377
end
377378

378-
function avx_loopset(instr, ops, arf, AM, LPSYM, LB, vargs)
379+
function avx_loopset(instr, ops, arf, AM, LPSYM, LB, @nospecialize(vargs))
379380
ls = LoopSet(:LoopVectorization)
380381
num_arrays = length(arf)
381382
elementbytes = sizeofeltypes(vargs, num_arrays)
@@ -407,7 +408,7 @@ function _avx_loopset_debug(::Type{OPS}, ::Type{ARF}, ::Type{AM}, ::Type{LPSYM},
407408
@show OPS ARF AM LPSYM LB vargs
408409
_avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, typeof.(vargs))
409410
end
410-
function _avx_loopset(OPSsv, ARFsv, AMsv, LPSYMsv, LBsv, vargs)
411+
function _avx_loopset(OPSsv, ARFsv, AMsv, LPSYMsv, LBsv, @nospecialize(vargs))
411412
nops = length(OPSsv) ÷ 3
412413
instr = Instruction[Instruction(OPSsv[3i+1], OPSsv[3i+2]) for i 0:nops-1]
413414
ops = OperationStruct[ OPSsv[3i] for i 1:nops ]

utils/generate_precompiles.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
using LoopVectorization, SnoopCompile
2+
pkgdir = dirname(@__DIR__)
3+
tinf = @snoopi tmin=0.01 include(joinpath(pkgdir, "test", "runtests.jl"))
4+
pc = SnoopCompile.parcel(tinf; blacklist=["vmaterialize", "vmaterialize!"])
5+
pcs = pc[:LoopVectorization]
6+
open(joinpath(pkgdir, "src", "precompile.jl"), "w") do io
7+
println(io, """
8+
function _precompile_()
9+
ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
10+
""")
11+
for stmt in sort(pcs)
12+
println(io, " ", stmt)
13+
end
14+
println(io, "end")
15+
end

0 commit comments

Comments
 (0)