Skip to content

Commit 937fffe

Browse files
committed
Some progress towards 1.6.
1 parent 70a632f commit 937fffe

File tree

6 files changed

+33
-22
lines changed

6 files changed

+33
-22
lines changed

src/add_compute.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,9 @@ function add_pow!(
328328
return add_compute!(ls, var, :^, [xop, pop], elementbytes)
329329
end
330330
if pint == -1
331-
return add_compute!(ls, var, :vinv, [xop], elementbytes)
331+
return add_compute!(ls, var, :inv, [xop], elementbytes)
332332
elseif pint < 0
333-
xop = add_compute!(ls, gensym(:inverse), :vinv, [xop], elementbytes)
333+
xop = add_compute!(ls, gensym(:inverse), :inv, [xop], elementbytes)
334334
pint = - pint
335335
end
336336
if pint == 0

src/costs.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ const COST = Dict{Symbol,InstructionCost}(
183183
# Instruction(:ifelse) => InstructionCost(1, 0.5),
184184
:ifelse => InstructionCost(1, 0.5),
185185
:inv => InstructionCost(13,4.0,-2.0,1),
186-
:vinv => InstructionCost(13,4.0,-2.0,1),
186+
# :vinv => InstructionCost(13,4.0,-2.0,1),
187187
:muladd => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
188188
:fma => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive
189189
# :vmuladd => InstructionCost(4,0.5), # + and * will fuse into this, so much of the time they're not twice as expensive

src/loopstartstopmanager.jl

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
200200
loopsym = names(ls)[n]
201201
index = Expr(:tuple)
202202
found_loop_sym = false
203+
call = Expr(:call, lv(:pointerforcomparison))
203204
for i getindicesonly(ar)
204205
if i === loopsym
205206
found_loop_sym = true
@@ -214,14 +215,15 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
214215
else
215216
push!(index.args, staticexpr(stophint - sub))
216217
end
217-
ptr = vptr(ar)
218+
push!(call.args, vptr(ar))
218219
# return
219220
else
220221
push!(index.args, Expr(:call, lv(:Zero)))
221222
end
222223
end
223224
@assert found_loop_sym "Failed to find $loopsym"
224-
Expr(:call, lv(:pointerforcomparison), ptr, index)
225+
push!(call.args, index)
226+
call
225227
# @show ar, loopsym
226228
end
227229
function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvectorized::Bool, stopsym)::Expr

src/reconstruct_loopset.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ function pushvarg!(ls::LoopSet, ar::ArrayReferenceMeta, i, name)
128128
end
129129
function add_mref!(
130130
ls::LoopSet, ar::ArrayReferenceMeta, i::Int, @nospecialize(_::Type{S}), name
131-
) where {T, N, C, B, R, X, O, S <: StridedPointer{T,N,C,B,R,X,O}}
131+
) where {T, N, C, B, R, X, O, S <: AbstractStridedPointer{T,N,C,B,R,X,O}}
132132
@assert B 0 "Batched arrays not supported yet."
133133
sp = ArrayInterface.rank_to_sortperm(R)
134134
# maybe no change needed? -- optimize common case
@@ -401,7 +401,7 @@ function sizeofeltypes(v, num_arrays)::Int
401401
Ttemp = typeeltype(v[i])
402402
if !VectorizationBase.SIMD_NATIVE_INTEGERS && Ttemp <: Integer # hack
403403
return VectorizationBase.REGISTER_SIZE
404-
end
404+
end
405405
T = promote_type(T, Ttemp)
406406
end
407407
sizeof(T)

test/gemv.jl

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -233,16 +233,21 @@ using Test
233233
mygemvavx_range!(y2, A, x)
234234
@test y1full y2full
235235

236-
Abit = A .> 0.5;
237-
fill!(y2, -9999); mygemv_avx!(y2, Abit, x);
238-
@test y2 Abit * x
239-
fill!(y2, -9999); mygemvavx!(y2, Abit, x);
240-
@test y2 Abit * x
241-
xbit = x .> 0.5;
242-
fill!(y2, -9999); mygemv_avx!(y2, A, xbit);
243-
@test y2 A * xbit
244-
fill!(y2, -9999); mygemvavx!(y2, A, xbit);
245-
@test y2 A * xbit
236+
let M = 56
237+
A = view(Afull, M .+ (1:M), K .+ (1:K)); A .= rand.(Ref(R));
238+
y1 = view(y1full, M .+ (1:M));
239+
y2 = view(y2full, M .+ (1:M));
240+
Abit = A .> 0.5;
241+
fill!(y2, -9999); mygemv_avx!(y2, Abit, x);
242+
@test y2 Abit * x
243+
fill!(y2, -9999); mygemvavx!(y2, Abit, x);
244+
@test y2 Abit * x
245+
xbit = x .> 0.5;
246+
fill!(y2, -9999); mygemv_avx!(y2, A, xbit);
247+
@test y2 A * xbit
248+
fill!(y2, -9999); mygemvavx!(y2, A, xbit);
249+
@test y2 A * xbit
250+
end
246251

247252
# Check for out of bounds stores
248253
fill!(y1, 0); fill!(y2, 0); @test y1full y2full

test/ifelsemasks.jl

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ T = Float32
3737
z
3838
end
3939

40-
function Bernoulli_logit(y::BitVector, α::AbstractVector{T}) where {T}
40+
function Bernoulli_logit(y, α::AbstractVector{T}) where {T}
4141
t = zero(promote_type(Float64,T))
4242
@inbounds for i eachindex(α)
4343
invOmP = 1 + exp(α[i])
@@ -47,7 +47,7 @@ T = Float32
4747
end
4848
t
4949
end
50-
function Bernoulli_logitavx(y::BitVector, α::AbstractVector{T}) where {T}
50+
function Bernoulli_logitavx(y, α::AbstractVector{T}) where {T}
5151
t = zero(T === Int32 ? Float32 : Float64)
5252
@avx for i eachindex(α)
5353
invOmP = 1 + exp(α[i])
@@ -57,7 +57,7 @@ T = Float32
5757
end
5858
t
5959
end
60-
function Bernoulli_logit_avx(y::BitVector, α::AbstractVector{T}) where {T}
60+
function Bernoulli_logit_avx(y, α::AbstractVector{T}) where {T}
6161
t = zero(T === Int32 ? Float32 : Float64)
6262
@_avx for i eachindex(α)
6363
invOmP = 1 + exp(α[i])
@@ -492,15 +492,19 @@ T = Float32
492492

493493

494494
a = rand(-10:10, 43);
495-
bit = a .> 0.5;
495+
bit = a .> 0.5; bool = copyto!(Vector{Bool}(undef, length(bit)), bit);
496496
t = Bernoulli_logit(bit, a);
497497
@test isapprox(t, Bernoulli_logitavx(bit, a), atol = Int === Int32 ? 0.1 : 0)
498498
@test isapprox(t, Bernoulli_logit_avx(bit, a), atol = Int === Int32 ? 0.1 : 0)
499+
@test isapprox(t, Bernoulli_logitavx(bool, a), atol = Int === Int32 ? 0.1 : 0)
500+
@test isapprox(t, Bernoulli_logit_avx(bool, a), atol = Int === Int32 ? 0.1 : 0)
499501
a = rand(43);
500-
bit = a .> 0.5;
502+
bit = a .> 0.5; bool = copyto!(Vector{Bool}(undef, length(bit)), bit);
501503
t = Bernoulli_logit(bit, a);
502504
@test t Bernoulli_logitavx(bit, a)
503505
@test t Bernoulli_logit_avx(bit, a)
506+
@test t Bernoulli_logitavx(bool, a)
507+
@test t Bernoulli_logit_avx(bool, a)
504508

505509
ai = [rand(Bool) for _ in 1:71];
506510
bi = [rand(Bool) for _ in 1:71];

0 commit comments

Comments
 (0)