Skip to content

Commit b352aba

Browse files
committed
Fix test failure
1 parent 3131e1c commit b352aba

File tree

4 files changed

+28
-43
lines changed

4 files changed

+28
-43
lines changed

src/codegen/lower_load.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ function add_prefetches!(q::Expr, ls::LoopSet, op::Operation, td::UnrollArgs, pr
6666
prefetchstride *= gethint(prefetchloop_step)
6767
end
6868
offsets[prefetchind] = inner_offset + prefetchstride
69-
gespinds = mem_offset_u(op, td, indices_calculated_by_pointer_offsets(ls, op.ref), false, 0, ls)
69+
gespinds = mem_offset_u(op, td, indices_calculated_by_pointer_offsets(ls, op.ref), false, 0, ls, false)
7070
offsets[prefetchind] = inner_offset
7171
ptr = vptr(op)
7272
gptr = Symbol(ptr, "##GESPEDPREFETCH##")
@@ -151,12 +151,13 @@ function lower_load_no_optranslation!(
151151
loadexpr = Expr(:call, lv(:_vload), sptr(op), inds)
152152
add_memory_mask!(loadexpr, op, td, mask, ls, 0)
153153
push!(loadexpr.args, falseexpr, rs) # unaligned load
154+
# @show op loadexpr
154155
push!(q.args, Expr(:(=), mvar, loadexpr))
155156
elseif (u₁ > 1) & opu₁
156157
t = Expr(:tuple)
157158
sptrsym = sptr!(q, op)
158159
for u 1:u₁
159-
inds = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, u-1, ls)
160+
inds = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, u-1, ls, false)
160161
loadexpr = Expr(:call, lv(:_vload), sptrsym, inds)
161162
domask = mask && (isvectorized(op) & ((u == u₁) | (vloopsym !== u₁loopsym)))
162163
add_memory_mask!(loadexpr, op, td, domask, ls, u)
@@ -166,7 +167,7 @@ function lower_load_no_optranslation!(
166167
end
167168
push!(q.args, Expr(:(=), mvar, Expr(:call, lv(:VecUnroll), t)))
168169
else
169-
inds = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls)
170+
inds = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls, false#= not unrolled =#)
170171
loadexpr = Expr(:call, lv(:_vload), sptr(op), inds)
171172
add_memory_mask!(loadexpr, op, td, mask, ls, 0)
172173
push!(loadexpr.args, falseexpr, rs)
@@ -209,7 +210,7 @@ function lower_load_for_optranslation!(
209210
# abs of steps are equal
210211
equal_steps = (step₁ == step₂) (posindicator 0x03)
211212
_td = UnrollArgs(u₁loop, u₂loop, vloop, u₁, u₂max, Core.ifelse(equal_steps, 0, u₂max - 1))
212-
gespinds = mem_offset(op, _td, inds_by_ptroff, false, ls)
213+
gespinds = mem_offset(op, _td, inds_by_ptroff, false, ls, false)
213214
ptr = vptr(op)
214215
gptr = Symbol(ptr, "##GESPED##")
215216
for i eachindex(gespinds.args)
@@ -457,7 +458,7 @@ function lower_load_collection!(
457458
# construct dummy unrolled loop
458459
offset_dummy_loop = Loop(first(opindices), MaybeKnown(1), MaybeKnown(1024), MaybeKnown(1), Symbol(""), Symbol(""))
459460
unrollcurl₂ = unrolled_curly(op, nouter, offset_dummy_loop, vloop, mask, 1) # interleave always 1 here
460-
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, 0, ls)
461+
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, 0, ls, false)
461462
falseexpr = Expr(:call, lv(:False)); rs = staticexpr(reg_size(ls));
462463

463464
opu₁, opu₂ = isunrolled_sym(op, u₁loopsym, u₂loopsym, vloopsym, ls)
@@ -499,7 +500,7 @@ function lower_load_collection!(
499500
for u 0:u₁-1
500501
collectionname_u = Symbol(collectionname, :_, u)
501502
if u 0
502-
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, u, ls)
503+
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, u, ls, false)
503504
uinds = Expr(:call, unrollcurl₂, inds)
504505
loadexpr = copy(loadexpr)
505506
loadexpr.args[3] = Expr(:call, unrollcurl₂, inds)

src/codegen/lower_memory_common.jl

Lines changed: 15 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ unrolled loads are calculated as offsets with respect to an initial gesp. This h
139139
Therefore, unrolled === true results in inds being ignored.
140140
_mm means to insert `mm`s.
141141
"""
142-
function mem_offset(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vector{Bool}, _mm::Bool, ls::LoopSet)
142+
function mem_offset(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vector{Bool}, _mm::Bool, ls::LoopSet, preserve_vecunroll::Bool)
143143
# @assert accesses_memory(op) "Computing memory offset only makes sense for operations that access memory."
144144
ret = Expr(:tuple)
145145
indices = getindicesonly(op)
@@ -163,19 +163,17 @@ function mem_offset(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vect
163163
# addoffset!(ret, newname, stride, offset, _mmi)
164164
_mmi = indvectorized && parent !== op && (!isvectorized(parent))
165165
@assert !_mmi "Please file an issue with an example of how you got this."
166-
if isu₁unrolled(parent) & (td.u₁ > 1)
166+
if (isu₁unrolled(parent) & (td.u₁ > 1)) & (!preserve_vecunroll)
167167
gf = GlobalRef(Core,:getfield)
168168
firstnew = Expr(:call, gf, Expr(:call, gf, newname, 1), 1, false)
169169
if isvectorized(parent) & (!_mm)
170170
firstnew = Expr(:call, lv(:unmm), firstnew)
171171
end
172172
addoffset!(ret, 0, firstnew, offset, false)
173+
elseif isvectorized(parent) & (!_mm)
174+
addoffset!(ret, 0, Expr(:call, lv(:unmm), newname), offset, false)
173175
else
174-
if isvectorized(parent) & (!_mm)
175-
addoffset!(ret, 0, Expr(:call, lv(:unmm), newname), offset, false)
176-
else
177-
addoffset!(ret, 0, newname, offset, false)
178-
end
176+
addoffset!(ret, 0, newname, offset, false)
179177
end
180178
end
181179
end
@@ -275,18 +273,18 @@ function unrolled_curly(op::Operation, u₁::Int, u₁loop::Loop, vloop::Loop, m
275273
end
276274
end
277275
function unrolledindex(op::Operation, td::UnrollArgs, mask::Bool, inds_calc_by_ptr_offset::Vector{Bool}, ls::LoopSet)
278-
@unpack u₁, u₁loopsym, u₁loop, vloop = td
279-
isone(u₁) && return mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls)
280-
any(==(u₁loopsym), getindicesonly(op)) || return mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls)
276+
@unpack u₁, u₁loopsym, u₁loop, vloop = td
277+
isone(u₁) && return mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls, false)
278+
any(==(u₁loopsym), getindicesonly(op)) || return mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls, true)
281279

282-
unrollcurl = unrolled_curly(op, u₁, u₁loop, vloop, mask)
283-
ind = mem_offset_u(op, td, inds_calc_by_ptr_offset, false, 0, ls)
284-
Expr(:call, unrollcurl, ind)
280+
unrollcurl = unrolled_curly(op, u₁, u₁loop, vloop, mask)
281+
ind = mem_offset_u(op, td, inds_calc_by_ptr_offset, false, 0, ls, false)
282+
Expr(:call, unrollcurl, ind)
285283
end
286284

287285
function mem_offset_u(
288-
op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vector{Bool}, _mm::Bool, incr₁::Int, ls::LoopSet
289-
)
286+
op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vector{Bool}, _mm::Bool, incr₁::Int, ls::LoopSet, preserve_vecunroll::Bool
287+
)
290288
@assert accesses_memory(op) "Computing memory offset only makes sense for operations that access memory."
291289
@unpack u₁loopsym, u₂loopsym, vloopsym, u₁step, u₂step, vstep, suffix = td
292290

@@ -299,7 +297,7 @@ function mem_offset_u(
299297
# allbasezero = all(inds_calc_by_ptr_offset) && all(iszero, offsets)
300298
loopedindex = op.ref.loopedindex
301299
if iszero(incr₁) & iszero(incr₂)
302-
return mem_offset(op, td, inds_calc_by_ptr_offset, _mm, ls)
300+
return mem_offset(op, td, inds_calc_by_ptr_offset, _mm, ls, preserve_vecunroll)
303301
# append_inds!(ret, indices, loopedindex)
304302
else
305303
for (n,ind) enumerate(indices)
@@ -334,7 +332,7 @@ function mem_offset_u(
334332
newname_unmm = mulexpr(newname_unmm,stride)
335333
end
336334
addoffset!(ret, 0, newname_unmm, offset, false)
337-
elseif isu₁unrolled(parent) & (td.u₁ > 1)
335+
elseif (isu₁unrolled(parent) & (td.u₁ > 1)) && !preserve_vecunroll
338336
gf = GlobalRef(Core,:getfield)
339337
firstnew = Expr(:call, gf, Expr(:call, gf, newname, 1), incr₁+1, false)
340338
if stride 1
@@ -420,20 +418,6 @@ function add_memory_mask!(memopexpr::Expr, op::Operation, td::UnrollArgs, mask::
420418
nothing
421419
end
422420

423-
# varassignname(var::Symbol, u::Int, isunrolled::Bool) = isunrolled ? Symbol(var, u) : var
424-
# # name_memoffset only gets called when vectorized
425-
# function name_memoffset(var::Symbol, op::Operation, td::UnrollArgs, u₁unrolled::Bool, inds_calc_by_ptr_offset::Vector{Bool}, ls::LoopSet)
426-
# @unpack u₁, u₁loopsym, u₂loopsym, suffix = td
427-
# if (suffix == -1) && u₁ < 0 # u₁ == -1 sentinel value meaning not unrolled
428-
# name = var
429-
# mo = mem_offset(op, td, inds_calc_by_ptr_offset, true, 0, ls)
430-
# else
431-
# name = u₁unrolled ? Symbol(var, u₁) : var
432-
# mo = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls)
433-
# end
434-
# name, mo
435-
# end
436-
437421
function condvarname_and_unroll(cond::Operation, u₁loop::Symbol, u₂loop::Symbol, vloop::Symbol, suffix::Int, opu₂::Bool, ls::LoopSet)
438422
condvar, condu₁, condu₂ = variable_name_and_unrolled(cond, u₁loop, u₂loop, vloop, Core.ifelse(opu₂, suffix, -1), ls)
439423
condvar, condu₁

src/codegen/lower_store.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ function lower_store_collection!(
9292

9393
offset_dummy_loop = Loop(first(getindices(op)), MaybeKnown(1), MaybeKnown(1024), MaybeKnown(1), Symbol(""), Symbol(""))
9494
unrollcurl₂ = unrolled_curly(op, nouter, offset_dummy_loop, vloop, mask, 1)
95-
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, 0, ls)
95+
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, 0, ls, false)
9696
falseexpr = Expr(:call, lv(:False));
9797
aliasexpr = falseexpr;
9898
# trueexpr = Expr(:call, lv(:True));
@@ -150,7 +150,7 @@ function lower_store_collection!(
150150
end
151151
storeexpr_tmp.args[3] = Expr(:call, lv(:VecUnroll), vut)
152152
if u 0
153-
storeexpr_tmp.args[4] = Expr(:call, unrollcurl₂, mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, u, ls))
153+
storeexpr_tmp.args[4] = Expr(:call, unrollcurl₂, mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, u, ls, false))
154154
end
155155
push!(q.args, storeexpr_tmp)
156156
end
@@ -216,7 +216,7 @@ function lower_store!(
216216
data_u₁ && push!(q.args, Expr(:(=), mvard, Expr(:call, lv(:data), mvar)))
217217
sptrsym = sptr!(q, op)
218218
for u 1:u₁
219-
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, u-1, ls)
219+
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, u-1, ls, false)
220220
# @show isu₁unrolled(opp), opp
221221
storeexpr = if data_u₁
222222
if reductfunc === Symbol("")
@@ -235,7 +235,7 @@ function lower_store!(
235235
push!(q.args, storeexpr)
236236
end
237237
else
238-
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, 0, ls)
238+
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, 0, ls, false)
239239
storeexpr = if reductfunc === Symbol("")
240240
Expr(:call, lv(:_vstore!), sptr(op), mvar, inds)
241241
else
@@ -300,7 +300,7 @@ function lower_tiled_store!(blockq::Expr, op::Operation, ls::LoopSet, ua::Unroll
300300
push!(tup.args, Symbol(variable_name(opp, ifelse(isu₂, t, -1)), '_', u))
301301
end
302302
vut = Expr(:call, lv(:VecUnroll), tup) # `VecUnroll` of `VecUnroll`s
303-
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, 0, ls)
303+
inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, false, 0, ls, false)
304304
unrollcurl₂ = unrolled_curly(op, u₂, u₂loop, vloop, mask)
305305
falseexpr = Expr(:call, lv(:False));
306306
aliasexpr = falseexpr;

src/parse/add_constants.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ function add_constant_vload!(ls::LoopSet, op::Operation, mpref::ArrayReferenceMe
109109
for ind getindicesonly(op)
110110
ensure_constant_lowered!(ls, mpref, ind)
111111
end
112-
push!(vloadcall.args, mem_offset(op, UnrollArgs(dummyloop, dummyloop, dummyloop, 0, 0, 0), fill(false,nindices), true, ls))
112+
push!(vloadcall.args, mem_offset(op, UnrollArgs(dummyloop, dummyloop, dummyloop, 0, 0, 0), fill(false,nindices), true, ls, false))
113113
end
114114
push!(vloadcall.args, Expr(:call, lv(:False)), staticexpr(reg_size(ls)))
115115
pushpreamble!(ls, Expr(:(=), temp, vloadcall))

0 commit comments

Comments
 (0)