Skip to content

Commit e4fa49e

Browse files
committed
Library can be loaded.
1 parent 03a49f4 commit e4fa49e

13 files changed

+293
-105
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ authors = ["Chris Elrod <[email protected]>"]
44
version = "0.8.26"
55

66
[deps]
7+
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
78
DocStringExtensions = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
89
IfElse = "615f187c-cbe4-4ef1-ba3b-2fcf58d6d173"
910
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
@@ -13,6 +14,7 @@ UnPack = "3a884ed6-31ef-47d7-9d2a-63182c4928ed"
1314
VectorizationBase = "3d5dd08c-fd9d-11e8-17fa-ed2836048c2f"
1415

1516
[compat]
17+
ArrayInterface = "2.13.7"
1618
DocStringExtensions = "0.8"
1719
IfElse = "0"
1820
OffsetArrays = "1"

src/LoopVectorization.jl

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ module LoopVectorization
55
# end
66

77
using VectorizationBase, SLEEFPirates, UnPack, OffsetArrays
8-
using VectorizationBase: REGISTER_SIZE, data,
8+
using VectorizationBase: REGISTER_SIZE, REGISTER_COUNT, data,
99
mask, pick_vector_width_val, MM,
1010
maybestaticlength, maybestaticsize, staticm1, staticp1, staticmul, vzero,
1111
Zero, maybestaticrange, offsetprecalc,
12-
maybestaticfirst, maybestaticlast, scalar_less, gesp, pointerforcomparison, NativeTypes, staticmul,
13-
relu
12+
maybestaticfirst, maybestaticlast, scalar_less, gep, gesp, pointerforcomparison, NativeTypes, staticmul,
13+
relu, stridedpointer, StridedPointer
1414
using IfElse: ifelse
1515

1616
const Static = StaticInt
@@ -29,25 +29,21 @@ import LinearAlgebra # for check_args
2929

3030
using Base.FastMath: add_fast, sub_fast, mul_fast, div_fast
3131

32+
using ArrayInterface
33+
using ArrayInterface: OptionallyStaticUnitRange, Zero
34+
const Static = ArrayInterface.StaticInt
35+
3236
export LowDimArray, stridedpointer,
3337
@avx, @_avx, *ˡ, _avx_!,
3438
vmap, vmap!, vmapt, vmapt!, vmapnt, vmapnt!, vmapntt, vmapntt!,
3539
vfilter, vfilter!, vmapreduce, vreduce
3640

37-
const VECTORWIDTHSYMBOL, ELTYPESYMBOL = Symbol("##Wvecwidth##"), Symbol("##Tloopeltype##")
3841

39-
"""
40-
REGISTER_COUNT defined in VectorizationBase is supposed to correspond to the actual number of floating point registers on the system.
41-
It is hardcoded into a file at build time.
42-
However, someone may have multiple builds of Julia on the same system, some 32-bit and some 64-bit (e.g., they use 64-bit primarilly,
43-
but keep a 32-bit build on hand to debug test failures on Appveyor's 32-bit build). Thus, we don't want REGISTER_COUNT to be hardcoded
44-
in such a fashion.
45-
32-bit builds are limited to only 8 floating point registers, so we take care of that here.
42+
const VECTORWIDTHSYMBOL, ELTYPESYMBOL = Symbol("##Wvecwidth##"), Symbol("##Tloopeltype##")
4643

47-
If you want good performance, DO NOT use a 32-bit build of Julia if you don't have to.
48-
"""
49-
const REGISTER_COUNT = Sys.ARCH === :i686 ? 8 : VectorizationBase.REGISTER_COUNT
5044

45+
include("vectorizationbase_compat/contract_pass.jl")
46+
include("vectorizationbase_compat/subsetview.jl")
5147
include("getconstindexes.jl")
5248
# include("vectorizationbase_extensions.jl")
5349
include("predicates.jl")

src/condense_loopset.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ function loop_boundary(loop::Loop)
126126
startexact = loop.startexact
127127
stopexact = loop.stopexact
128128
if startexact & stopexact
129-
Expr(:call, Expr(:curly, lv(:StaticUnitRange), loop.starthint, loop.stophint))
129+
Expr(:call, lv(:OptionallyStaticUnitRange), staticexpr(loop.starthint), staticexpr(loop.stophint))
130130
elseif startexact
131-
Expr(:call, Expr(:curly, lv(:StaticLowerUnitRange), loop.starthint), loop.stopsym)
131+
Expr(:call, lv(:OptionallyStaticUnitRange), staticexpr(loop.starthint), loop.stopsym)
132132
elseif stopexact
133-
Expr(:call, Expr(:curly, lv(:StaticUpperUnitRange), loop.stophint), loop.startsym)
133+
Expr(:call, lv(:OptionallyStaticUnitRange), loop.startsym, staticexpr(loop.stophint))
134134
else
135135
Expr(:call, :(:), loop.startsym, loop.stopsym)
136136
end

src/constructors.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ end
5757

5858

5959
function LoopSet(q::Expr, mod::Symbol = :Main)
60-
SIMDPirates.contract_pass!(q)
60+
contract_pass!(q)
6161
ls = LoopSet(mod)
6262
copyto!(ls, q)
6363
resize!(ls.loop_order, num_loops(ls))
@@ -136,7 +136,7 @@ and `uᵢ=-1` disables unrolling for the correspond loop.
136136
137137
The `@avx` macro also checks the array arguments using `LoopVectorization.check_args` to try and determine
138138
if they are compatible with the macro. If `check_args` returns false, a fall back loop annotated with `@inbounds`
139-
and `@fastmath` is generated. Note that `SIMDPirates` provides functions such as `evadd` and `evmul` that will
139+
and `@fastmath` is generated. Note that `VectorizationBase` provides functions such as `vadd` and `vmul` that will
140140
ignore `@fastmath`, preserving IEEE semantics both within `@avx` and `@fastmath`.
141141
`check_args` currently returns false for some wrapper types like `LinearAlgebra.UpperTriangular`, requiring you to
142142
use their `parent`. Triangular loops aren't yet supported.

src/determinestrategy.jl

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11

2+
const CACHELINE_SIZE = VectorizationBase.L₁CACHE.linesize
3+
24
# function indexappearences(op::Operation, s::Symbol)
35
# s ∉ loopdependencies(op) && return 0
46
# appearences = 0
@@ -80,7 +82,7 @@ function cost(ls::LoopSet, op::Operation, vectorized::Symbol, Wshift::Int, size_
8082
# would be nice to add a check for this CPU, to see if such a penalty is still appropriate.
8183
# Also, once more SVE (scalable vector extension) CPUs are released, would be nice to know if
8284
# this feature is common to all of them.
83-
srt += 0.5VectorizationBase.REGISTER_SIZE / VectorizationBase.CACHELINE_SIZE
85+
srt += 0.5VectorizationBase.REGISTER_SIZE / CACHELINE_SIZE
8486
end
8587
elseif isstore(op) # broadcast or reductionstore; if store we want to penalize reduction
8688
srt *= 3
@@ -469,9 +471,9 @@ function solve_unroll(
469471
W::Int, vectorized::Symbol, rounduᵢ::Int
470472
)
471473
(u₁step, u₂step) = if rounduᵢ == 1 # max is to safeguard against some weird arch I've never heard of.
472-
(max(1,VectorizationBase.CACHELINE_SIZE ÷ VectorizationBase.REGISTER_SIZE), 1)
474+
(max(1,CACHELINE_SIZE ÷ VectorizationBase.REGISTER_SIZE), 1)
473475
elseif rounduᵢ == 2
474-
(1, max(1,VectorizationBase.CACHELINE_SIZE ÷ VectorizationBase.REGISTER_SIZE))
476+
(1, max(1,CACHELINE_SIZE ÷ VectorizationBase.REGISTER_SIZE))
475477
else
476478
(1, 1)
477479
end
@@ -887,7 +889,7 @@ function evaluate_cost_tile(
887889
rt, lat, rp = cost(ls, op, vectorized, Wshift, size_T)
888890
if isload(op)
889891
if !iszero(prefetchisagoodidea(ls, op, UnrollArgs(4, unrollsyms, 4, 0)))
890-
# rt += 0.5VectorizationBase.REGISTER_SIZE / VectorizationBase.CACHELINE_SIZE
892+
# rt += 0.5VectorizationBase.REGISTER_SIZE / CACHELINE_SIZE
891893
prefetch_good_idea = true
892894
end
893895
end

src/filter.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ if (Base.libllvm_version ≥ v"7" && VectorizationBase.AVX512F) || Base.libllvm_
1212
for _ 1:Nrep
1313
vy = vload(Vec{W,T}, ptr_y)
1414
mask = f(Vec(vy))
15-
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
15+
VectorizationBase.compressstore!(gep(ptr_x, j), vy, mask)
1616
ptr_y = gepbyte(ptr_y, VectorizationBase.REGISTER_SIZE)
1717
j = vadd(j, count_ones(mask))
1818
end
1919
rem_mask = VectorizationBase.mask(T, Nrem)
2020
vy = vload(Vec{W,T}, ptr_y, rem_mask)
2121
mask = rem_mask & f(Vec(vy))
22-
SIMDPirates.compressstore!(gep(ptr_x, j), vy, mask)
22+
VectorizationBase.compressstore!(gep(ptr_x, j), vy, mask)
2323
j = vadd(j, count_ones(mask))
2424
Base._deleteend!(x, N-j) # resize!(x, j)
2525
end

src/lower_constant.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,14 @@ function lower_constant!(
7070
call = if reducedchildvectorized && vectorized loopdependencies(op)
7171
instrclass = getparentsreductzero(ls, op)
7272
if instrclass == ADDITIVE_IN_REDUCTIONS
73-
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:addscalar)), Expr(:call, lv(:vzero), VECTORWIDTHSYMBOL, ELTYPESYMBOL), constsym)
73+
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(:addscalar)), Expr(:call, lv(:vzero), VECTORWIDTHSYMBOL, ELTYPESYMBOL), constsym)
7474
elseif instrclass == MULTIPLICATIVE_IN_REDUCTIONS
75-
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:mulscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :one, ELTYPESYMBOL)), constsym)
75+
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(:mulscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :one, ELTYPESYMBOL)), constsym)
7676
elseif instrclass == MAX
77-
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:maxscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemin, ELTYPESYMBOL)), constsym)
77+
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(:maxscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemin, ELTYPESYMBOL)), constsym)
7878

7979
elseif instrclass == MIN
80-
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:minscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemax, ELTYPESYMBOL)), constsym)
80+
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(:minscalar)), Expr(:call, lv(:vbroadcast), VECTORWIDTHSYMBOL, Expr(:call, :typemax, ELTYPESYMBOL)), constsym)
8181

8282
else
8383
throw("Reductions of type $(reduction_zero(reinstrclass)) not yet supported; please file an issue as a reminder to take care of this.")

src/lower_load.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ function prefetchisagoodidea(ls::LoopSet, op::Operation, td::UnrollArgs)
7070
length(loopdependencies(op)) 1 && return 0
7171
vectorized loopdependencies(op) || return 0
7272
u₂loopsym === Symbol("##undefined##") && return 0
73-
dontskip = (VectorizationBase.CACHELINE_SIZE ÷ VectorizationBase.REGISTER_SIZE) - 1
73+
dontskip = (CACHELINE_SIZE ÷ VectorizationBase.REGISTER_SIZE) - 1
7474
# u₂loopsym is vectorized
7575
# u₁vectorized = vectorized === u₁loopsym
7676
u₂vectorized = vectorized === u₂loopsym

src/lower_store.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ using VectorizationBase: vnoaliasstore!
44
@inline vstoreadditivereduce!(args...) = vnoaliasstore!(args...)
55
@inline vstoremultiplicativevereduce!(args...) = vnoaliasstore!(args...)
66
@inline function vstoreadditivereduce!(ptr::VectorizationBase.AbstractStridedPointer, v::VectorizationBase.Vec, i::Tuple{Vararg{Union{Integer,Static}}})
7-
vnoaliasstore!(ptr, SIMDPirates.vsum(v), i)
7+
vnoaliasstore!(ptr, VectorizationBase.vsum(v), i)
88
end
99
@inline function vstoreadditivereduce!(ptr::VectorizationBase.AbstractStridedPointer, v::VectorizationBase.Vec, i::Tuple{Vararg{Union{Integer,Static}}}, m::VectorizationBase.Mask)
10-
vnoaliasstore!(ptr, SIMDPirates.vsum(v), i, m)
10+
vnoaliasstore!(ptr, VectorizationBase.vsum(v), i, m)
1111
end
1212
@inline function vstoremultiplicativevereduce!(ptr::VectorizationBase.AbstractStridedPointer, v::VectorizationBase.Vec, i::Tuple{Vararg{Union{Integer,Static}}})
13-
vnoaliasstore!(ptr, SIMDPirates.vprod(v), i)
13+
vnoaliasstore!(ptr, VectorizationBase.vprod(v), i)
1414
end
1515
@inline function vstoremultiplicativevereduce!(ptr::VectorizationBase.AbstractStridedPointer, v::VectorizationBase.Vec, i::Tuple{Vararg{Union{Integer,Static}}}, m::VectorizationBase.Mask)
16-
vnoaliasstore!(ptr, SIMDPirates.vprod(v), i, m)
16+
vnoaliasstore!(ptr, VectorizationBase.vprod(v), i, m)
1717
end
1818

1919
function storeinstr(op::Operation, vectorized::Symbol)

src/lowering.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -229,12 +229,12 @@ end
229229
# end
230230

231231
function assume(ex)
232-
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:assume)), ex)
232+
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(:assume)), ex)
233233
end
234234
function expect(ex)
235-
use_expect() || return ex
236-
Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:SIMDPirates)), QuoteNode(:expect)), ex)
237-
# ex
235+
# use_expect() || return ex
236+
# Expr(:call, Expr(:(.), Expr(:(.), :LoopVectorization, QuoteNode(:VectorizationBase)), QuoteNode(:expect)), ex)
237+
ex
238238
end
239239
function loopiteratesatleastonce(loop::Loop, as::Bool = true)
240240
comp = if loop.startexact # requires !loop.stopexact

0 commit comments

Comments
 (0)