Skip to content

Commit 91ae6bd

Browse files
committed
CSE some more strides-multiple calculations.
1 parent b12cd7a commit 91ae6bd

File tree

3 files changed

+26
-11
lines changed

3 files changed

+26
-11
lines changed

src/LoopVectorization.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ using VectorizationBase: REGISTER_SIZE, extract_data, num_vector_load_expr,
1010
maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, subsetview, vzero, stridedpointer_for_broadcast,
1111
Static, Zero, StaticUnitRange, StaticLowerUnitRange, StaticUpperUnitRange, unwrap, maybestaticrange,
1212
AbstractColumnMajorStridedPointer, AbstractRowMajorStridedPointer, AbstractSparseStridedPointer, AbstractStaticStridedPointer,
13-
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct,
13+
PackedStridedPointer, SparseStridedPointer, RowMajorStridedPointer, StaticStridedPointer, StaticStridedStruct, offsetprecalc,
1414
maybestaticfirst, maybestaticlast, scalar_less, scalar_greater, noalias!, gesp, gepbyte, pointerforcomparison, NativeTypes, staticmul, staticmuladd
1515
using SIMDPirates: VECTOR_SYMBOLS, evadd, evsub, evmul, evfdiv, vrange,
1616
reduced_add, reduced_prod, reduce_to_add, reduced_max, reduced_min, vsum, vprod, vmaximum, vminimum,

src/loopstartstopmanager.jl

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
6666
end
6767
isbroadcast = ls.isbroadcast[]
6868
gespinds = Expr(:tuple)
69+
offsetprecalc_descript = Expr(:tuple)
70+
use_offsetprecalc = false
6971
for i eachindex(li)
7072
ii = i + offset
7173
ind = indices[ii]
@@ -78,9 +80,12 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
7880
if (!li[i])
7981
uliv[i] = 0
8082
push!(gespinds.args, Expr(:call, lv(:Zero)))
83+
push!(offsetprecalc_descript.args, 0)
8184
elseif isbroadcast || ((isone(ii) && (last(looporder) === ind)) && !(otherindexunrolled(ls, ind, ar)) || multiple_with_name(vptr(ar), allarrayrefs)) || isstaticloop(getloop(ls, ind))
85+
# Not doing normal offset indexing
8286
uliv[i] = -findfirst(isequal(ind), looporder)::Int
8387
push!(gespinds.args, Expr(:call, lv(:Zero)))
88+
push!(offsetprecalc_descript.args, 0) # not doing offset indexing, so push 0
8489
else
8590
uliv[i] = findfirst(isequal(ind), looporder)::Int
8691
loop = getloop(ls, ind)
@@ -89,9 +94,24 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
8994
else
9095
push!(gespinds.args, Expr(:call, lv(:staticm1), loop.startsym))
9196
end
97+
if ind === names(ls)[us.vectorizedloopnum]
98+
push!(offsetprecalc_descript.args, 0)
99+
elseif (ind === names(ls)[us.u₁loopnum]) & (us.u₁ > 3)
100+
use_offsetprecalc = true
101+
push!(offsetprecalc_descript.args, us.u₁)
102+
elseif (ind === names(ls)[us.u₂loopnum]) & (us.u₂ > 3)
103+
use_offsetprecalc = true
104+
push!(offsetprecalc_descript.args, us.u₂)
105+
else
106+
push!(offsetprecalc_descript.args, 0)
107+
end
92108
end
93109
end
94-
push!(q.args, Expr(:(=), vptr(ar), Expr(:call, lv(:gesp), vptr(ar), gespinds)))
110+
if use_offsetprecalc
111+
push!(q.args, Expr(:(=), vptr(ar), Expr(:call, lv(:offsetprecalc), Expr(:call, lv(:gesp), vptr(ar), gespinds), Expr(:call, Expr(:curly, :Val, offsetprecalc_descript)))))
112+
else
113+
push!(q.args, Expr(:(=), vptr(ar), Expr(:call, lv(:gesp), vptr(ar), gespinds)))
114+
end
95115
uliv
96116
end
97117

@@ -197,13 +217,8 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
197217
end
198218
@show ar, loopsym
199219
end
200-
# @inline function assume_a_greater_than_b_ret_a(a::Ptr, b::VectorizationBase.AbstractStridedPointer)
201-
# SIMDPirates.assume(pointer(b) < a)
202-
# a
203-
# end
204-
# @inline assume_a_greater_than_b_ret_a(a, b) = a
220+
205221
function defpointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool)::Expr
206-
# Expr(:(=), maxsym(vptr(ar), sub), Expr(:call, lv(:assume_a_greater_than_b_ret_a), pointermax(ls, ar, n, sub, isvectorized), vptr(ar)))
207222
Expr(:(=), maxsym(vptr(ar), sub), pointermax(ls, ar, n, sub, isvectorized))
208223
end
209224

src/vectorizationbase_extensions.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,9 @@ end
3232
# @inline VectorizationBase.offset(ptr::OffsetStridedPointer, ind::Tuple{I}) where {I} = VectorizationBase.offset(ptr.ptr, ind)
3333
# Tuple of length > 1, subtract offsets.
3434
# @inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,N}, ind::Tuple) where {N} = VectorizationBase.offset(ptr.ptr, ntuple(n -> ind[n] + ptr.offsets[n], Val{N}()))
35-
@inline VectorizationBase.offset(ptr::OffsetStridedPointer, ind::Tuple{I}) where {I} = ind
35+
@inline VectorizationBase.offset(ptr::OffsetStridedPointer, ind::Tuple{I}) where {I} = VectorizationBase.offset(ptr.ptr, ind)
3636
# Tuple of length > 1, subtract offsets.
37-
@inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,N}, ind::Tuple) where {N} = ntuple(n -> vsub(ind[n], ptr.offsets[n]), Val{N}())
37+
@inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,N}, ind::Tuple) where {N} = VectorizationBase.offset(ptr.ptr, ntuple(n -> vsub(ind[n], ptr.offsets[n]), Val{N}()))
3838
@inline Base.similar(p::OffsetStridedPointer, ptr::Ptr) = OffsetStridedPointer(similar(p.ptr, ptr), p.offsets)
3939
@inline Base.pointer(p::OffsetStridedPointer) = pointer(p.ptr)
4040
@inline VectorizationBase.gesp(p::OffsetStridedPointer, i) = similar(p.ptr, gep(p, i))
@@ -44,5 +44,5 @@ end
4444
subsetview(gesp(ptr.ptr, ntuple(n -> 0 - @inbounds(ptr.offsets[n]), Val{N}())), Val{I}(), i)
4545
end
4646

47-
@inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,<:Any,<:VectorizationBase.AbstractBitPointer}, ind::Tuple{I}) where {I} = (vsub(ind[1], ptr.offsets[1]),)
47+
@inline VectorizationBase.offset(ptr::OffsetStridedPointer{<:Any,<:Any,<:VectorizationBase.AbstractBitPointer}, ind::Tuple{I}) where {I} = VectorizationBase.offset(ptr.ptr, (vsub(ind[1], ptr.offsets[1]),))
4848
@inline VectorizationBase.gesp(ptr::VectorizationBase.AbstractBitPointer, i) = OffsetStridedPointer(ptr, vsub.(-1, unwrap.(i)))

0 commit comments

Comments
 (0)