Skip to content

Commit bccb69f

Browse files
committed
Try to fix mac test failure
1 parent 7786d05 commit bccb69f

File tree

3 files changed

+56
-44
lines changed

3 files changed

+56
-44
lines changed

src/codegen/lower_load.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ function lower_load_no_optranslation!(
150150
if (all(op.ref.loopedindex) && !rejectcurly(op)) && vectorization_profitable(op)
151151
inds = unrolledindex(op, td, mask, inds_calc_by_ptr_offset, ls)
152152
loadexpr = Expr(:call, lv(:_vload), sptr(op), inds)
153-
add_memory_mask!(loadexpr, op, td, mask, ls)
153+
add_memory_mask!(loadexpr, op, td, mask, ls, 0)
154154
push!(loadexpr.args, falseexpr, rs) # unaligned load
155155
push!(q.args, Expr(:(=), mvar, loadexpr))
156156
elseif (u₁ > 1) & opu₁
@@ -160,7 +160,7 @@ function lower_load_no_optranslation!(
160160
inds = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, u-1, ls)
161161
loadexpr = Expr(:call, lv(:_vload), sptrsym, inds)
162162
domask = mask && (isvectorized(op) & ((u == u₁) | (vloopsym !== u₁loopsym)))
163-
add_memory_mask!(loadexpr, op, td, domask, ls)
163+
add_memory_mask!(loadexpr, op, td, domask, ls, u)
164164
push!(loadexpr.args, falseexpr, rs)
165165
push!(t.args, loadexpr)
166166
# push!(q.args, Expr(:(=), mvar, loadexpr))
@@ -169,7 +169,7 @@ function lower_load_no_optranslation!(
169169
else
170170
inds = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls)
171171
loadexpr = Expr(:call, lv(:_vload), sptr(op), inds)
172-
add_memory_mask!(loadexpr, op, td, mask, ls)
172+
add_memory_mask!(loadexpr, op, td, mask, ls, 0)
173173
push!(loadexpr.args, falseexpr, rs)
174174
push!(q.args, Expr(:(=), mvar, loadexpr))
175175
end
@@ -465,7 +465,7 @@ function lower_load_collection!(
465465
uinds = Expr(:call, unrollcurl₂, inds)
466466
sptrsym = sptr!(q, op)
467467
loadexpr = Expr(:call, lv(:_vload), sptrsym, uinds)
468-
# not using `add_memory_mask!(storeexpr, op, ua, mask, ls)` because we checked `isconditionalmemop` earlier in `lower_load_collection!`
468+
# not using `add_memory_mask!(storeexpr, op, ua, mask, ls, 0)` because we checked `isconditionalmemop` earlier in `lower_load_collection!`
469469
u₁vectorized = u₁loopsym === vloopsym
470470
if (mask && isvectorized(op))
471471
if !(manualunrollu₁ & u₁vectorized)

src/codegen/lower_memory_common.jl

Lines changed: 49 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -367,45 +367,57 @@ end
367367

368368

369369
isconditionalmemop(op::Operation) = (instruction(op).instr === :conditionalload) || (instruction(op).instr === :conditionalstore!)
370-
function add_memory_mask!(memopexpr::Expr, op::Operation, td::UnrollArgs, mask::Bool, ls::LoopSet)
371-
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = td
372-
if isconditionalmemop(op)
373-
condop = last(parents(op))
374-
opu₂ = (suffix -1) && isu₂unrolled(op)
375-
condvar, condu₁unrolled = condvarname_and_unroll(condop, u₁loopsym, u₂loopsym, vloopsym, suffix, opu₂, ls)
376-
# if it isn't unrolled, then `m`
377-
u = condu₁unrolled ? u₁ : 1
378-
# u = isu₁unrolled(condop) ? u₁ : 1
379-
condvar = Symbol(condvar, '_', u)
380-
# If we need to apply `MASKSYMBOL` and the condvar
381-
# 2 condvar possibilities:
382-
# `VecUnroll` applied everywhere
383-
# single mask "broadcast"
384-
# 2 mask possibilities
385-
# u₁loopsym ≠ vloopsym, and we mask all
386-
# u₁loopsym == vloopsym, and we mask last
387-
# broadcast both, so can do so implicitly
388-
# this is true whether or not `condbroadcast`
389-
if !mask || (!isvectorized(op))
390-
push!(memopexpr.args, condvar)
391-
elseif (u₁loopsym vloopsym) | (u₁ == 1) # mask all equivalenetly
392-
push!(memopexpr.args, Expr(:call, lv(:&), condvar, MASKSYMBOL))
393-
# if the condition `(u₁loopsym ≢ vloopsym) | (u₁ == 1)` failed, we need to apply `MASKSYMBOL` only to last unroll.
394-
elseif !condu₁unrolled && isu₁unrolled(op) # condbroadcast
395-
# explicitly broadcast `condvar`, and apply `MASKSYMBOL` to end
396-
t = Expr(:call, lv(:promote))
397-
for um 1:u₁-1
398-
push!(t.args, condvar)
399-
end
400-
push!(t.args, Expr(:call, lv(:&), condvar, MASKSYMBOL))
401-
push!(memopexpr.args, Expr(:call, lv(:VecUnroll), t))
402-
else# !condbroadcast && !vecunrolled
403-
push!(memopexpr.args, Expr(:call, lv(:and_last), condvar, MASKSYMBOL))
370+
function add_memory_mask!(memopexpr::Expr, op::Operation, td::UnrollArgs, mask::Bool, ls::LoopSet, u₁ᵢ::Int)
371+
@unpack u₁, u₁loopsym, u₂loopsym, vloopsym, u₂max, suffix = td
372+
if isconditionalmemop(op)
373+
condop = last(parents(op))
374+
opu₂ = (suffix -1) && isu₂unrolled(op)
375+
condvar, condu₁unrolled = condvarname_and_unroll(condop, u₁loopsym, u₂loopsym, vloopsym, suffix, opu₂, ls)
376+
# if it isn't unrolled, then `m`
377+
u = condu₁unrolled ? u₁ : 1
378+
# u = isu₁unrolled(condop) ? u₁ : 1
379+
condvar = Symbol(condvar, '_', u)
380+
# If we need to apply `MASKSYMBOL` and the condvar
381+
# 2 condvar possibilities:
382+
# `VecUnroll` applied everywhere
383+
# single mask "broadcast"
384+
# 2 mask possibilities
385+
# u₁loopsym ≠ vloopsym, and we mask all
386+
# u₁loopsym == vloopsym, and we mask last
387+
# broadcast both, so can do so implicitly
388+
# this is true whether or not `condbroadcast`
389+
if !mask || (!isvectorized(op))
390+
if u₁ᵢ == 0 | (u == 1)
391+
push!(memopexpr.args, condvar)
392+
else
393+
push!(memopexpr.args, :($getfield($getfield($condvar, 1), $(u₁ᵢ), false)))
394+
end
395+
elseif (u₁loopsym vloopsym) | (u₁ == 1) # mask all equivalenetly
396+
push!(memopexpr.args, Expr(:call, lv(:&), condvar, MASKSYMBOL))
397+
# if the condition `(u₁loopsym ≢ vloopsym) | (u₁ == 1)` failed, we need to apply `MASKSYMBOL` only to last unroll.
398+
elseif ((!condu₁unrolled)) && isu₁unrolled(op) # condbroadcast
399+
if u₁ᵢ == 0
400+
# explicitly broadcast `condvar`, and apply `MASKSYMBOL` to end
401+
t = Expr(:call, lv(:promote))
402+
for um 1:u₁-1
403+
push!(t.args, condvar)
404404
end
405-
elseif mask && isvectorized(op)
406-
push!(memopexpr.args, MASKSYMBOL)
405+
push!(t.args, Expr(:call, lv(:&), condvar, MASKSYMBOL))
406+
push!(memopexpr.args, Expr(:call, lv(:VecUnroll), t))
407+
else
408+
push!(memopexpr.args, condvar)
409+
end
410+
elseif u₁i == 0# !condbroadcast && !vecunrolled
411+
push!(memopexpr.args, Expr(:call, lv(:and_last), condvar, MASKSYMBOL))
412+
elseif u₁i == u₁ # mask
413+
push!(memopexpr.args, Expr(:call, lv(:&), :($getfield($getfield(condvar,1),$u₁i,false)), MASKSYMBOL))
414+
else
415+
push!(memopexpr.args, Expr(:call, lv(:&), :($getfield($getfield(condvar,1),$u₁i,false))))
407416
end
408-
nothing
417+
elseif mask && isvectorized(op)
418+
push!(memopexpr.args, MASKSYMBOL)
419+
end
420+
nothing
409421
end
410422

411423
# varassignname(var::Symbol, u::Int, isunrolled::Bool) = isunrolled ? Symbol(var, u) : var

src/codegen/lower_store.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ function lower_store!(
192192
else
193193
Expr(:call, lv(:_vstore!), lv(reductfunc), sptr(op), mvar, inds)
194194
end
195-
add_memory_mask!(storeexpr, op, ua, mask, ls)
195+
add_memory_mask!(storeexpr, op, ua, mask, ls, 0)
196196
push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs)
197197
push!(q.args, storeexpr)
198198
else
@@ -223,7 +223,7 @@ function lower_store!(
223223
Expr(:call, lv(:_vstore!), lv(reductfunc), sptrsym, mvar, inds)
224224
end
225225
domask = mask && (isvectorized(op) & ((u == u₁) | (vloopsym !== u₁loopsym)))
226-
add_memory_mask!(storeexpr, op, ua, domask, ls)# & ((u == u₁) | isvectorized(op)))
226+
add_memory_mask!(storeexpr, op, ua, domask, ls, u)# & ((u == u₁) | isvectorized(op)))
227227
push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs)
228228
push!(q.args, storeexpr)
229229
end
@@ -234,7 +234,7 @@ function lower_store!(
234234
else
235235
Expr(:call, lv(:_vstore!), lv(reductfunc), sptr(op), mvar, inds)
236236
end
237-
add_memory_mask!(storeexpr, op, ua, mask, ls)
237+
add_memory_mask!(storeexpr, op, ua, mask, ls, 0)
238238
push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs)
239239
push!(q.args, storeexpr)
240240
end

0 commit comments

Comments
 (0)