Skip to content

Commit 45cbcdc

Browse files
committed
Updated to track removal of vectorizable from VectorizationBase. Also added some functions useful for defining derivatives.
1 parent 66bef0b commit 45cbcdc

File tree

5 files changed

+36
-10
lines changed

5 files changed

+36
-10
lines changed

Manifest.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
4949

5050
[[SIMDPirates]]
5151
deps = ["VectorizationBase"]
52-
git-tree-sha1 = "6d93eddeaf847073dfa36ad339d76015c59a9adb"
52+
git-tree-sha1 = "f62bec2edf3dc415ac06547e8c9ef07b55d46c0a"
5353
uuid = "21efa798-c60a-11e8-04d3-e1a92915a26a"
54-
version = "0.3.14"
54+
version = "0.3.15"
5555

5656
[[SLEEFPirates]]
5757
deps = ["Libdl", "SIMDPirates", "VectorizationBase"]
@@ -71,6 +71,6 @@ uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
7171

7272
[[VectorizationBase]]
7373
deps = ["CpuId", "LinearAlgebra"]
74-
git-tree-sha1 = "e1093ff0fc183880a6f836026309ba06672c92ec"
74+
git-tree-sha1 = "2a377190de71d8d3c7a65da8c6283e1d2c7f0507"
7575
uuid = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
76-
version = "0.3.1"
76+
version = "0.4.0"

src/LoopVectorization.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, extract_data, num_vector
77
Static, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange,
88
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct
99
using SIMDPirates: VECTOR_SYMBOLS, evadd, evmul, vrange, reduced_add, reduced_prod, reduce_to_add, reduce_to_prod,
10-
sizeequivalentfloat, sizeequivalentint
11-
# vmullog2, vmullog10, vdivlog2, vdivlog2add, vdivlog10, vdivlog10add, vfmaddaddone
10+
sizeequivalentfloat, sizeequivalentint, vadd!, vsub!, vfmadd!, vfnmadd!,
11+
vmullog2, vmullog10, vdivlog2, vdivlog10, vmullog2add!, vmullog10add!, vdivlog2add!, vdivlog10add!, vfmaddaddone
1212
using Base.Broadcast: Broadcasted, DefaultArrayStyle
1313
using LinearAlgebra: Adjoint, Transpose
1414

src/costs.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ const COST = Dict{Instruction,InstructionCost}(
108108
Instruction(:(/)) => InstructionCost(13,4.0,-2.0),
109109
Instruction(:vadd) => InstructionCost(4,0.5),
110110
Instruction(:vsub) => InstructionCost(4,0.5),
111+
Instruction(:vadd!) => InstructionCost(4,0.5),
112+
Instruction(:vsub!) => InstructionCost(4,0.5),
111113
Instruction(:vmul) => InstructionCost(4,0.5),
112114
Instruction(:vfdiv) => InstructionCost(13,4.0,-2.0),
113115
Instruction(:evadd) => InstructionCost(4,0.5),
@@ -148,10 +150,21 @@ const COST = Dict{Instruction,InstructionCost}(
148150
Instruction(:vfmsub) => InstructionCost(4,0.5), # - and * will fuse into this, so much of the time they're not twice as expensive
149151
Instruction(:vfnmadd) => InstructionCost(4,0.5), # + and -* will fuse into this, so much of the time they're not twice as expensive
150152
Instruction(:vfnmsub) => InstructionCost(4,0.5), # - and -* will fuse into this, so much of the time they're not twice as expensive
153+
Instruction(:vfmadd!) => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
154+
Instruction(:vfnmadd!) => InstructionCost(4,0.5), # + and -* will fuse into this, so much of the time they're not twice as expensive
151155
Instruction(:vfmadd_fast) => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
152156
Instruction(:vfmsub_fast) => InstructionCost(4,0.5), # - and * will fuse into this, so much of the time they're not twice as expensive
153157
Instruction(:vfnmadd_fast) => InstructionCost(4,0.5), # + and -* will fuse into this, so much of the time they're not twice as expensive
154158
Instruction(:vfnmsub_fast) => InstructionCost(4,0.5), # - and -* will fuse into this, so much of the time they're not twice as expensive
159+
Instruction(:vfmaddaddone) => InstructionCost(4,0.5), # - and -* will fuse into this, so much of the time they're not twice as expensive
160+
Instruction(:vmullog2) => InstructionCost(4,0.5),
161+
Instruction(:vmullog2add!) => InstructionCost(4,0.5),
162+
Instruction(:vmullog10) => InstructionCost(4,0.5),
163+
Instruction(:vmullog10add!) => InstructionCost(4,0.5),
164+
Instruction(:vdivlog2) => InstructionCost(13,4.0,-2.0),
165+
Instruction(:vdivlog2add!) =>InstructionCost(13,4.0,-2.0),
166+
Instruction(:vdivlog10) => InstructionCost(13,4.0,-2.0),
167+
Instruction(:vdivlog10add!) =>InstructionCost(13,4.0,-2.0),
155168
Instruction(:sqrt) => InstructionCost(15,4.0,-2.0),
156169
Instruction(:sqrt_fast) => InstructionCost(15,4.0,-2.0),
157170
Instruction(:log) => InstructionCost(20,20.0,40.0,20),
@@ -213,6 +226,8 @@ const REDUCTION_CLASS = Dict{Symbol,Float64}(
213226
:vfmsub => ADDITIVE_IN_REDUCTIONS,
214227
:vfnmadd => ADDITIVE_IN_REDUCTIONS,
215228
:vfnmsub => ADDITIVE_IN_REDUCTIONS,
229+
:vfmadd! => ADDITIVE_IN_REDUCTIONS,
230+
:vfnmadd! => ADDITIVE_IN_REDUCTIONS,
216231
:vfmadd_fast => ADDITIVE_IN_REDUCTIONS,
217232
:vfmsub_fast => ADDITIVE_IN_REDUCTIONS,
218233
:vfnmadd_fast => ADDITIVE_IN_REDUCTIONS,
@@ -260,9 +275,11 @@ isreductcombineinstr(instr::Instruction) = isreductcombineinstr(instr.instr)
260275
const FUNCTIONSYMBOLS = Dict{Type{<:Function},Instruction}(
261276
typeof(+) => :(+),
262277
typeof(SIMDPirates.vadd) => :(+),
278+
typeof(SIMDPirates.vadd!) => :(+),
263279
typeof(Base.FastMath.add_fast) => :(+),
264280
typeof(-) => :(-),
265281
typeof(SIMDPirates.vsub) => :(-),
282+
typeof(SIMDPirates.vsub!) => :(-),
266283
typeof(Base.FastMath.sub_fast) => :(-),
267284
typeof(*) => :(*),
268285
typeof(SIMDPirates.vmul) => :(*),
@@ -287,10 +304,21 @@ const FUNCTIONSYMBOLS = Dict{Type{<:Function},Instruction}(
287304
typeof(SIMDPirates.vfmsub) => :vfmsub,
288305
typeof(SIMDPirates.vfnmadd) => :vfnmadd,
289306
typeof(SIMDPirates.vfnmsub) => :vfnmsub,
307+
typeof(SIMDPirates.vfmadd!) => :vfmadd!,
308+
typeof(SIMDPirates.vfnmadd!) => :vfnmadd!,
290309
typeof(SIMDPirates.vfmadd_fast) => :vfmadd_fast,
291310
typeof(SIMDPirates.vfmsub_fast) => :vfmsub_fast,
292311
typeof(SIMDPirates.vfnmadd_fast) => :vfnmadd_fast,
293312
typeof(SIMDPirates.vfnmsub_fast) => :vfnmsub_fast,
313+
typeof(vfmaddaddone) => :vfmaddaddone,
314+
typeof(vmullog2) => :vmullog2,
315+
typeof(vmullog2add!) => :vmullog2add!,
316+
typeof(vmullog10) => :vmullog10,
317+
typeof(vmullog10add!) => :vmullog10add!,
318+
typeof(vdivlog2) => :vdivlog2,
319+
typeof(vdivlog2add!) => :vdivlog2add!,
320+
typeof(vdivlog10) => :vdivlog10,
321+
typeof(vdivlog10add!) => :vdivlog10add!,
294322
typeof(sqrt) => :sqrt,
295323
typeof(Base.FastMath.sqrt_fast) => :sqrt,
296324
typeof(SIMDPirates.vsqrt) => :sqrt,

src/map.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
function vmap_quote(N, ::Type{T}) where {T}
44
W, Wshift = VectorizationBase.pick_vector_width_shift(T)
55
val = Expr(:call, Expr(:curly, :Val, W))
6-
q = Expr(:block, Expr(:(=), :M, Expr(:call, :length, :dest)), Expr(:(=), :vdest, Expr(:call, :vectorizable, :dest)), Expr(:(=), :m, 0))
6+
q = Expr(:block, Expr(:(=), :M, Expr(:call, :length, :dest)), Expr(:(=), :vdest, Expr(:call, :pointer, :dest)), Expr(:(=), :m, 0))
77
fcall = Expr(:call, :f)
88
loopbody = Expr(:block, Expr(:call, :vstore!, :vdest, fcall, :m), Expr(:(+=), :m, W))
99
fcallmask = Expr(:call, :f)
1010
bodymask = Expr(:block, Expr(:(=), :__mask__, Expr(:call, :mask, val, Expr(:call, :&, :M, W-1))), Expr(:call, :vstore!, :vdest, fcallmask, :m, :__mask__))
1111
for n 1:N
1212
arg_n = Symbol(:varg_,n)
13-
push!(q.args, Expr(:(=), arg_n, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:call, :vectorizable, Expr(:ref, :args, n)))))
13+
push!(q.args, Expr(:(=), arg_n, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__,Symbol(@__FILE__)), Expr(:call, :pointer, Expr(:ref, :args, n)))))
1414
push!(fcall.args, Expr(:call, :vload, val, arg_n, :m))
1515
push!(fcallmask.args, Expr(:call, :vload, val, arg_n, :m, :__mask__))
1616
end

src/precompile.jl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ function _precompile_()
5858
precompile(Tuple{typeof(Base.Broadcast.broadcasted),Function,Array{Int64,3},LowDimArray{(false, true, true),Int64,3,Array{Int64,3}}})
5959
precompile(Tuple{typeof(Base.Broadcast.broadcasted),Function,Array{Int64,3},LowDimArray{(true, false, true),Int64,3,Array{Int64,3}}})
6060
precompile(Tuple{typeof(Base.Broadcast.broadcasted),Function,Array{Int64,3},LowDimArray{(true, true, false),Int64,3,Array{Int64,3}}})
61-
precompile(Tuple{typeof(Base.Broadcast.broadcasted),typeof(*ˡ),Array{Float64,2},Array{Float64,1}})
6261
precompile(Tuple{typeof(Base.Broadcast.broadcasted),typeof(*ˡ),Array{Int32,2},Array{Int32,1}})
6362
precompile(Tuple{typeof(Base.Broadcast.broadcasted),typeof(*ˡ),Array{Int64,2},Array{Int64,1}})
6463
precompile(Tuple{typeof(LoopVectorization._avx_loopset),Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,Core.SimpleVector,NTuple{4,DataType}})
@@ -336,7 +335,6 @@ function _precompile_()
336335
precompile(Tuple{typeof(foreach),typeof(empty!),Array{Array{LoopVectorization.Operation,1},1}})
337336
precompile(Tuple{typeof(getindex),Type{LoopVectorization.ArrayRefStruct},LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct,Vararg{LoopVectorization.ArrayRefStruct,N} where N})
338337
precompile(Tuple{typeof(getindex),Type{LoopVectorization.ArrayRefStruct},LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct})
339-
precompile(Tuple{typeof(getindex),Type{LoopVectorization.ArrayRefStruct},LoopVectorization.ArrayRefStruct,LoopVectorization.ArrayRefStruct})
340338
precompile(Tuple{typeof(hash),LoopVectorization.Instruction,UInt64})
341339
precompile(Tuple{typeof(iterate),LoopVectorization.LoopOrders,Array{Int64,1}})
342340
precompile(Tuple{typeof(println),Base.GenericIOBuffer{Array{UInt8,1}},Array{LoopVectorization.Operation,1}})

0 commit comments

Comments
 (0)