Skip to content

Commit 93d2be2

Browse files
authored
Pass arguments to _avx_! in registers. (#261)
* Pass arguments to _avx_! in registers. * Use `new` to bypass inner constructors * VectorizationBase 0.20
1 parent 3ef16fb commit 93d2be2

File tree

15 files changed

+271
-195
lines changed

15 files changed

+271
-195
lines changed

Project.toml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "LoopVectorization"
22
uuid = "bdcacae8-1622-11e9-2a5c-532679323890"
33
authors = ["Chris Elrod <[email protected]>"]
4-
version = "0.12.18"
4+
version = "0.12.19"
55

66
[deps]
77
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
@@ -20,17 +20,17 @@ VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
2020

2121
[compat]
2222
ArrayInterface = "3.1.9"
23-
CheapThreads = "0.1.3,0.2"
23+
CheapThreads = "0.2"
2424
DocStringExtensions = "0.8"
2525
IfElse = "0.1"
26-
OffsetArrays = "1.4.1, 1.5"
26+
OffsetArrays = "1.4.1"
2727
Requires = "1"
28-
SLEEFPirates = "0.6.14"
28+
SLEEFPirates = "0.6.18"
2929
Static = "0.2"
3030
StrideArraysCore = "0.1.5"
31-
ThreadingUtilities = "0.4.1"
31+
ThreadingUtilities = "0.4.2"
3232
UnPack = "1"
33-
VectorizationBase = "0.19.36"
33+
VectorizationBase = "0.20.1"
3434
julia = "1.5"
3535

3636
[extras]

src/LoopVectorization.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@ module LoopVectorization
33
using Static: StaticInt, gt
44
using VectorizationBase, SLEEFPirates, UnPack, OffsetArrays
55
using VectorizationBase: register_size, register_count, cache_linesize, cache_size, has_opmask_registers,
6-
mask, pick_vector_width, MM, AbstractMask, data, grouped_strided_pointer,
7-
maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, vzero,
8-
maybestaticrange, offsetprecalc, lazymul,
9-
maybestaticfirst, maybestaticlast, scalar_less, scalar_greaterequal, gep, gesp, NativeTypes, #llvmptr,
6+
mask, pick_vector_width, MM, AbstractMask, data, grouped_strided_pointer,
7+
maybestaticlength, maybestaticsize, vzero, maybestaticrange, offsetprecalc, lazymul,
8+
vadd_nw, vadd_nsw, vadd_nuw, vsub_nw, vsub_nsw, vsub_nuw, vmul_nw, vmul_nsw, vmul_nuw,
9+
maybestaticfirst, maybestaticlast, gep, gesp, NativeTypes, #llvmptr,
1010
vfmadd, vfmsub, vfnmadd, vfnmsub, vfmadd_fast, vfmsub_fast, vfnmadd_fast, vfnmsub_fast, vfmadd231, vfmsub231, vfnmadd231, vfnmsub231,
1111
vfma_fast, vmuladd_fast, vdiv_fast, vadd_fast, vsub_fast, vmul_fast,
1212
relu, stridedpointer, stridedpointer_preserve, StridedPointer, StridedBitPointer, AbstractStridedPointer, _vload, _vstore!,

src/codegen/loopstartstopmanager.jl

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -679,9 +679,9 @@ function pointermax_index(
679679
else
680680
_ind = if isvectorized
681681
if isone(sub)
682-
Expr(:call, lv(:vsub_fast), staticexpr(stophint), VECTORWIDTHSYMBOL)
682+
Expr(:call, lv(:vsub_nsw), staticexpr(stophint), VECTORWIDTHSYMBOL)
683683
else
684-
Expr(:call, lv(:vsub_fast), staticexpr(stophint), mulexpr(VECTORWIDTHSYMBOL, sub))
684+
Expr(:call, lv(:vsub_nsw), staticexpr(stophint), mulexpr(VECTORWIDTHSYMBOL, sub))
685685
end
686686
else
687687
staticexpr(stophint - sub)
@@ -717,9 +717,9 @@ function pointermax_index(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int,
717717
else
718718
_ind = if isvectorized
719719
if isone(sub)
720-
Expr(:call, lv(:vsub_fast), stopsym, VECTORWIDTHSYMBOL)
720+
Expr(:call, lv(:vsub_nsw), stopsym, VECTORWIDTHSYMBOL)
721721
else
722-
Expr(:call, lv(:vsub_fast), stopsym, mulexpr(VECTORWIDTHSYMBOL, sub))
722+
Expr(:call, lv(:vsub_nsw), stopsym, mulexpr(VECTORWIDTHSYMBOL, sub))
723723
end
724724
else
725725
subexpr(stopsym, sub)
@@ -817,25 +817,26 @@ function maxunroll(us::UnrollSpecification, n)
817817
end
818818

819819

820-
function startloop(ls::LoopSet, us::UnrollSpecification, n::Int, submax = maxunroll(us, n))
821-
@unpack u₁loopnum, u₂loopnum, vloopnum, u₁, u₂ = us
822-
lssm = ls.lssm
823-
termind = lssm.terminators[n]
824-
ptrdefs = lssm.incrementedptrs[n]
825-
loopstart = Expr(:block)
826-
firstloop = n == num_loops(ls)
827-
for ar ptrdefs
828-
ptr_offset = vptr_offset(ar)
829-
push!(loopstart.args, Expr(:(=), ptr_offset, ptr_offset))
830-
end
831-
if iszero(termind)
832-
loopsym = names(ls)[n]
833-
push!(loopstart.args, startloop(getloop(ls, loopsym), loopsym))
834-
else
835-
isvectorized = n == vloopnum
836-
append_pointer_maxes!(loopstart, ls, ptrdefs[termind], n, submax, isvectorized)
837-
end
838-
loopstart
820+
function startloop(ls::LoopSet, us::UnrollSpecification, n::Int, staticinit::Bool = false)
821+
@unpack u₁loopnum, u₂loopnum, vloopnum, u₁, u₂ = us
822+
lssm = ls.lssm
823+
termind = lssm.terminators[n]
824+
ptrdefs = lssm.incrementedptrs[n]
825+
loopstart = Expr(:block)
826+
firstloop = n == num_loops(ls)
827+
for ar ptrdefs
828+
ptr_offset = vptr_offset(ar)
829+
push!(loopstart.args, Expr(:(=), ptr_offset, ptr_offset))
830+
end
831+
if iszero(termind)
832+
loopsym = names(ls)[n]
833+
push!(loopstart.args, startloop(getloop(ls, loopsym), loopsym, staticinit))
834+
else
835+
isvectorized = n == vloopnum
836+
submax = maxunroll(us, n)
837+
append_pointer_maxes!(loopstart, ls, ptrdefs[termind], n, submax, isvectorized)
838+
end
839+
loopstart
839840
end
840841
function offset_ptr(
841842
ar::ArrayReferenceMeta, us::UnrollSpecification, loopsym::Symbol, n::Int, UF::Int, offsetinds::Vector{Bool}, loop::Loop

src/codegen/lower_compute.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ function _add_loopvalue!(ex::Expr, loopval::Symbol, vloop::Loop, u::Int)
6969
else
7070
mm = _MMind(loopval, step(vloop))
7171
if isone(u)
72-
push!(ex.args, Expr(:call, lv(:vadd_fast), VECTORWIDTHSYMBOL, mm))
72+
push!(ex.args, Expr(:call, lv(:vadd_nsw), VECTORWIDTHSYMBOL, mm))
7373
else
74-
push!(ex.args, Expr(:call, lv(:vadd_fast), Expr(:call, lv(:vmul_fast), VECTORWIDTHSYMBOL, u), mm))
74+
push!(ex.args, Expr(:call, lv(:vadd_nsw), Expr(:call, lv(:vmul_nsw), VECTORWIDTHSYMBOL, u), mm))
7575
end
7676
end
7777
elseif u == 0
7878
push!(ex.args, loopval)
7979
else
80-
push!(ex.args, Expr(:call, lv(:vadd_fast), loopval, staticexpr(u)))
80+
push!(ex.args, Expr(:call, lv(:vadd_nsw), loopval, staticexpr(u)))
8181
end
8282
end
8383
function add_loopvalue!(instrcall::Expr, loopval, ua::UnrollArgs, u₁::Int)

0 commit comments

Comments
 (0)