Skip to content

Commit e1cc0bd

Browse files
committed
Increment ind counter for constant offsets when tracked for shuffling. Fixes #209
1 parent 7b4f68e commit e1cc0bd

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

src/parse/memory_ops_common.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,7 @@ function array_reference_meta!(ls::LoopSet, array::Symbol, rawindices, elementby
384384
if ind isa Integer # subset
385385
if byterepresentable(ind)
386386
addconstindex!(indices, offsets, strides, loopedindex, ind)
387+
ninds += 1
387388
else
388389
vptrarray = subset_vptr!(ls, vptrarray, ninds, ind, indices, loopedindex, true)
389390
length(indices) == 0 && push!(indices, DISCONTIGUOUS)

test/shuffleloadstores.jl

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,47 @@ function cmatmul_array!(C::AbstractArray{T,3}, A::AbstractArray{T,3}, B::Abstrac
131131
end
132132
end
133133

134+
function issue209(M, G, J, H, A, B, ϕ)
135+
tmp = similar(ϕ, G-1, (2*J+1)*(H + 1))
136+
Bf = reinterpret(reshape, Float64, B)
137+
ϕf = reinterpret(reshape, Float64, ϕ)
138+
tmpf = reinterpret(reshape, Float64, tmp)
139+
jmax = 2*J + 1
140+
for mm = 1:M
141+
m_idx = M + 2 - mm
142+
@avx for hh = 1:H+1
143+
h_idx = (hh - 1)*jmax
144+
for jj = 1:jmax, gg = 1:G-1
145+
tmpf[1, gg, jj + h_idx] = ϕf[1, jj, gg+1, hh, m_idx] +
146+
Bf[1, jj, gg, hh, m_idx]
147+
tmpf[2, gg, jj + h_idx] = ϕf[2, jj, gg+1, hh, m_idx] +
148+
Bf[2, jj, gg, hh, m_idx]
149+
end
150+
end
151+
end
152+
tmp
153+
end
154+
function issue209_noavx(M, G, J, H, A, B, ϕ)
155+
tmp = similar(ϕ, G-1, (2*J+1)*(H + 1))
156+
Bf = reinterpret(reshape, Float64, B)
157+
ϕf = reinterpret(reshape, Float64, ϕ)
158+
tmpf = reinterpret(reshape, Float64, tmp)
159+
jmax = 2*J + 1
160+
for mm = 1:M
161+
m_idx = M + 2 - mm
162+
for hh = 1:H+1
163+
h_idx = (hh - 1)*jmax
164+
for jj = 1:jmax, gg = 1:G-1
165+
tmpf[1, gg, jj + h_idx] = ϕf[1, jj, gg+1, hh, m_idx] +
166+
Bf[1, jj, gg, hh, m_idx]
167+
tmpf[2, gg, jj + h_idx] = ϕf[2, jj, gg+1, hh, m_idx] +
168+
Bf[2, jj, gg, hh, m_idx]
169+
end
170+
end
171+
end
172+
tmp
173+
end
174+
134175
@testset "shuffles load/stores" begin
135176
for i 1:128
136177
ac = rand(Complex{Float64}, i);
@@ -168,5 +209,15 @@ end
168209
@test Cc1 Cc2# ≈ Cc3
169210
end
170211
end
212+
M = 100
213+
G = 50
214+
J = 50
215+
H = 300
216+
217+
A = Matrix(Tridiagonal(rand(G-1,G-1)));
218+
B = rand(Complex{Float64}, 2*J+1, G-1, H+1, M+1);
219+
ϕ = rand(Complex{Float64}, 2*J+1, G+1, H+1, M+1);
220+
@test issue209(M, G, J, H, A, B, ϕ) issue209_noavx(M, G, J, H, A, B, ϕ)
171221
end
172222

223+

0 commit comments

Comments
 (0)