@@ -139,7 +139,7 @@ unrolled loads are calculated as offsets with respect to an initial gesp. This h
139
139
Therefore, unrolled === true results in inds being ignored.
140
140
_mm means to insert `mm`s.
141
141
"""
142
- function mem_offset (op:: Operation , td:: UnrollArgs , inds_calc_by_ptr_offset:: Vector{Bool} , _mm:: Bool , ls:: LoopSet )
142
+ function mem_offset (op:: Operation , td:: UnrollArgs , inds_calc_by_ptr_offset:: Vector{Bool} , _mm:: Bool , ls:: LoopSet , preserve_vecunroll :: Bool )
143
143
# @assert accesses_memory(op) "Computing memory offset only makes sense for operations that access memory."
144
144
ret = Expr (:tuple )
145
145
indices = getindicesonly (op)
@@ -163,19 +163,17 @@ function mem_offset(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vect
163
163
# addoffset!(ret, newname, stride, offset, _mmi)
164
164
_mmi = indvectorized && parent != = op && (! isvectorized (parent))
165
165
@assert ! _mmi " Please file an issue with an example of how you got this."
166
- if isu₁unrolled (parent) & (td. u₁ > 1 )
166
+ if ( isu₁unrolled (parent) & (td. u₁ > 1 )) & ( ! preserve_vecunroll )
167
167
gf = GlobalRef (Core,:getfield )
168
168
firstnew = Expr (:call , gf, Expr (:call , gf, newname, 1 ), 1 , false )
169
169
if isvectorized (parent) & (! _mm)
170
170
firstnew = Expr (:call , lv (:unmm ), firstnew)
171
171
end
172
172
addoffset! (ret, 0 , firstnew, offset, false )
173
+ elseif isvectorized (parent) & (! _mm)
174
+ addoffset! (ret, 0 , Expr (:call , lv (:unmm ), newname), offset, false )
173
175
else
174
- if isvectorized (parent) & (! _mm)
175
- addoffset! (ret, 0 , Expr (:call , lv (:unmm ), newname), offset, false )
176
- else
177
- addoffset! (ret, 0 , newname, offset, false )
178
- end
176
+ addoffset! (ret, 0 , newname, offset, false )
179
177
end
180
178
end
181
179
end
@@ -275,18 +273,18 @@ function unrolled_curly(op::Operation, u₁::Int, u₁loop::Loop, vloop::Loop, m
275
273
end
276
274
end
277
275
function unrolledindex (op:: Operation , td:: UnrollArgs , mask:: Bool , inds_calc_by_ptr_offset:: Vector{Bool} , ls:: LoopSet )
278
- @unpack u₁, u₁loopsym, u₁loop, vloop = td
279
- isone (u₁) && return mem_offset_u (op, td, inds_calc_by_ptr_offset, true , 0 , ls)
280
- any (== (u₁loopsym), getindicesonly (op)) || return mem_offset_u (op, td, inds_calc_by_ptr_offset, true , 0 , ls)
276
+ @unpack u₁, u₁loopsym, u₁loop, vloop = td
277
+ isone (u₁) && return mem_offset_u (op, td, inds_calc_by_ptr_offset, true , 0 , ls, false )
278
+ any (== (u₁loopsym), getindicesonly (op)) || return mem_offset_u (op, td, inds_calc_by_ptr_offset, true , 0 , ls, true )
281
279
282
- unrollcurl = unrolled_curly (op, u₁, u₁loop, vloop, mask)
283
- ind = mem_offset_u (op, td, inds_calc_by_ptr_offset, false , 0 , ls)
284
- Expr (:call , unrollcurl, ind)
280
+ unrollcurl = unrolled_curly (op, u₁, u₁loop, vloop, mask)
281
+ ind = mem_offset_u (op, td, inds_calc_by_ptr_offset, false , 0 , ls, false )
282
+ Expr (:call , unrollcurl, ind)
285
283
end
286
284
287
285
function mem_offset_u (
288
- op:: Operation , td:: UnrollArgs , inds_calc_by_ptr_offset:: Vector{Bool} , _mm:: Bool , incr₁:: Int , ls:: LoopSet
289
- )
286
+ op:: Operation , td:: UnrollArgs , inds_calc_by_ptr_offset:: Vector{Bool} , _mm:: Bool , incr₁:: Int , ls:: LoopSet , preserve_vecunroll :: Bool
287
+ )
290
288
@assert accesses_memory (op) " Computing memory offset only makes sense for operations that access memory."
291
289
@unpack u₁loopsym, u₂loopsym, vloopsym, u₁step, u₂step, vstep, suffix = td
292
290
@@ -299,7 +297,7 @@ function mem_offset_u(
299
297
# allbasezero = all(inds_calc_by_ptr_offset) && all(iszero, offsets)
300
298
loopedindex = op. ref. loopedindex
301
299
if iszero (incr₁) & iszero (incr₂)
302
- return mem_offset (op, td, inds_calc_by_ptr_offset, _mm, ls)
300
+ return mem_offset (op, td, inds_calc_by_ptr_offset, _mm, ls, preserve_vecunroll )
303
301
# append_inds!(ret, indices, loopedindex)
304
302
else
305
303
for (n,ind) ∈ enumerate (indices)
@@ -334,7 +332,7 @@ function mem_offset_u(
334
332
newname_unmm = mulexpr (newname_unmm,stride)
335
333
end
336
334
addoffset! (ret, 0 , newname_unmm, offset, false )
337
- elseif isu₁unrolled (parent) & (td. u₁ > 1 )
335
+ elseif ( isu₁unrolled (parent) & (td. u₁ > 1 )) && ! preserve_vecunroll
338
336
gf = GlobalRef (Core,:getfield )
339
337
firstnew = Expr (:call , gf, Expr (:call , gf, newname, 1 ), incr₁+ 1 , false )
340
338
if stride ≠ 1
@@ -420,20 +418,6 @@ function add_memory_mask!(memopexpr::Expr, op::Operation, td::UnrollArgs, mask::
420
418
nothing
421
419
end
422
420
423
- # varassignname(var::Symbol, u::Int, isunrolled::Bool) = isunrolled ? Symbol(var, u) : var
424
- # # name_memoffset only gets called when vectorized
425
- # function name_memoffset(var::Symbol, op::Operation, td::UnrollArgs, u₁unrolled::Bool, inds_calc_by_ptr_offset::Vector{Bool}, ls::LoopSet)
426
- # @unpack u₁, u₁loopsym, u₂loopsym, suffix = td
427
- # if (suffix == -1) && u₁ < 0 # u₁ == -1 sentinel value meaning not unrolled
428
- # name = var
429
- # mo = mem_offset(op, td, inds_calc_by_ptr_offset, true, 0, ls)
430
- # else
431
- # name = u₁unrolled ? Symbol(var, u₁) : var
432
- # mo = mem_offset_u(op, td, inds_calc_by_ptr_offset, true, 0, ls)
433
- # end
434
- # name, mo
435
- # end
436
-
437
421
function condvarname_and_unroll (cond:: Operation , u₁loop:: Symbol , u₂loop:: Symbol , vloop:: Symbol , suffix:: Int , opu₂:: Bool , ls:: LoopSet )
438
422
condvar, condu₁, condu₂ = variable_name_and_unrolled (cond, u₁loop, u₂loop, vloop, Core. ifelse (opu₂, suffix, - 1 ), ls)
439
423
condvar, condu₁
0 commit comments