Skip to content

Commit 832c95c

Browse files
committed
Set initial pointer value for correct comparisons when they may be used to determine loop bounds. Fixes #290
1 parent fb9b868 commit 832c95c

File tree

3 files changed

+63
-2
lines changed

3 files changed

+63
-2
lines changed

src/codegen/loopstartstopmanager.jl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,12 @@ function pushgespind!(
428428
if index_by_index
429429
if gespsymbol === Symbol("")
430430
if constoffset == 0
431-
ns = Expr(:call, GlobalRef(VectorizationBase, :NullStep))
431+
nostep = if fromgsp | (!index_by_index)
432+
GlobalRef(VectorizationBase, :NullStep)
433+
else
434+
GlobalRef(ArrayInterface.Static, :Zero)
435+
end
436+
ns = Expr(:call, nostep)
432437
if fromgsp
433438
loop = getloop(ls, ind)
434439
if loop.rangesym Symbol("")

src/precompile.jl

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
function _precompile_()
22
ccall(:jl_generating_output, Cint, ()) == 1 || return nothing
33
# Base.precompile(Tuple{typeof(which(_turbo_!,(Val{UNROLL},Val{OPS},Val{ARF},Val{AM},Val{LPSYM},Tuple{LB, V},)).generator.gen),Any,Any,Any,Any,Any,Any,Any,Any,Type,Type,Type,Type,Any,Any}) # time: 1.0198073
4-
# Base.precompile(Tuple{typeof(gespf1),Any,Tuple{Any, VectorizationBase.NullStep}}) # time: 0.1096832
4+
# Base.precompile(Tuple{typeof(gespf1),Any,Tuple{Any, VectorizationBase.NullStep}}) # time: 0.1096832
5+
Base.precompile(avx_body, (LoopSet, Tuple{Bool,Int8,Int8,Bool,Int,Int,Int,Int,Int,Int,Int,UInt}))
6+
Base.precompile(lower_and_split_loops, (LoopSet, Int))
7+
Base.precompile(lower, (LoopSet, Int, Int, Int))
58
Base.precompile(Tuple{typeof(turbo_macro),Module,LineNumberNode,Expr}) # time: 0.09183489
69
Base.precompile(Tuple{typeof(gespf1),StridedPointer{Float64, 1, 1, 0, (1,), Tuple{StaticInt{8}}, Tuple{StaticInt{1}}},Tuple{StaticInt{1}}}) # time: 0.05469272
710
Base.precompile(Tuple{typeof(zerorangestart),UnitRange{Int}}) # time: 0.04291692

test/iteration_bound_tests.jl

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
using OffsetArrays
12

23
@testset "Iteration Bound Tests" begin
34
function masktest_incr1_none1start!(y,x)
@@ -19,4 +20,56 @@
1920
@test y == x
2021
masktest_incr2_none1start!(y,x)
2122
@test y == x .+ ifelse.((axes(x,1) .≤ 20) .& iseven.(axes(x,1)), 2, 0)
23+
24+
25+
# issue #290
26+
function my_gemm_noturbo!(out, s::Matrix{UInt8}, V, srows, scols, Vcols, μ)
27+
k = srows >> 2
28+
rem = srows & 3
29+
fill!(out, 0)
30+
31+
for c in 1:Vcols
32+
for i in 1:scols
33+
for l in 1:k
34+
block = s[l, i]
35+
for p in 1:4
36+
Aij = (block >> (2 * (p - 1))) & 3
37+
out[i, c] += (((Aij >= 2) + (Aij == 3) + (Aij == 1) * μ[i]) *
38+
V[4 * (l - 1) + p, c])
39+
end
40+
end
41+
end
42+
end
43+
nothing
44+
end
45+
function my_gemm!(out, s::Matrix{UInt8}, V, srows, scols, Vcols, μ)
46+
k = srows >> 2
47+
rem = srows & 3
48+
fill!(out, 0)
49+
50+
@avx for c in 1:Vcols
51+
for i in 1:scols
52+
for l in 1:k
53+
block = s[l, i]
54+
for p in 1:4
55+
Aij = (block >> (2 * (p - 1))) & 3
56+
out[i, c] += (((Aij >= 2) + (Aij == 3) + (Aij == 1) * μ[i]) *
57+
V[4 * (l - 1) + p, c])
58+
end
59+
end
60+
end
61+
end
62+
nothing
63+
end
64+
65+
out_true = Matrix{Float64}(undef, 100, 100);
66+
out_test1 = similar(out_true);
67+
# out_test2 = zeros(100, 100)
68+
μ = rand(100);
69+
s = rand(UInt8, 100, 100);
70+
V = rand(400, 100);
71+
72+
my_gemm_noturbo!(out_true, s, V, 400, 100, 100, μ)
73+
my_gemm!(out_test1, s, V, 400, 100, 100, μ);
74+
@test out_true out_test1
2275
end

0 commit comments

Comments
 (0)