@@ -81,7 +81,7 @@ function addoffset!(ret::Expr, indvectorized::Bool, vloopstride, indexstride, in
81
81
end
82
82
end
83
83
84
- function addoffset ! (ret:: Expr , indvectorized:: Bool , unrolledsteps, vloopstride, indexstride, index, offset, calcbypointeroffset:: Bool ) # 8 -> 7 args
84
+ function addvectoroffset ! (ret:: Expr , indvectorized:: Bool , unrolledsteps, vloopstride, indexstride, index, offset, calcbypointeroffset:: Bool ) # 8 -> 7 args
85
85
# if _iszero(unrolledsteps) # if no steps, pass through; should be unreachable
86
86
# addoffset!(ret, indvectorized, vloopstride, indexstride, index, offset, calcbypointeroffset)
87
87
# else
@@ -98,22 +98,22 @@ function addoffset!(ret::Expr, indvectorized::Bool, unrolledsteps, vloopstride,
98
98
end
99
99
end
100
100
# unrolledloopstride is a stride multiple on `unrolledsteps`
101
- function addoffset ! (
102
- ret:: Expr , indvectorized :: Bool , unrolledsteps:: Int , unrolledloopstride, vloopstride, indexstride:: Integer , index, offset:: Integer , calcbypointeroffset:: Bool
103
- ) # 9 -> (7 or 8) args
101
+ function addvectoroffset ! (
102
+ ret:: Expr , mm :: Bool , unrolledsteps:: Int , unrolledloopstride, vloopstride, indexstride:: Integer , index, offset:: Integer , calcbypointeroffset:: Bool , indvectorized :: Bool
103
+ ) # 10 -> (7 or 8) args
104
104
if unrolledsteps == 0 # neither unrolledloopstride or indexstride can be 0
105
- addoffset! (ret, indvectorized , vloopstride, indexstride, index, offset, calcbypointeroffset) # 7 arg
105
+ addoffset! (ret, mm , vloopstride, indexstride, index, offset, calcbypointeroffset) # 7 arg
106
106
elseif indvectorized
107
107
unrolledsteps *= indexstride
108
108
if isknown (unrolledloopstride)
109
- addoffset ! (ret, indvectorized , gethint (unrolledloopstride)* unrolledsteps, vloopstride, indexstride, index, offset, calcbypointeroffset) # 8 arg
109
+ addvectoroffset ! (ret, mm , gethint (unrolledloopstride)* unrolledsteps, vloopstride, indexstride, index, offset, calcbypointeroffset) # 8 arg
110
110
elseif unrolledsteps == 1
111
- addoffset ! (ret, indvectorized , unrolledloopstride, vloopstride, indexstride, index, offset, calcbypointeroffset) # 8 arg
111
+ addvectoroffset ! (ret, mm , unrolledloopstride, vloopstride, indexstride, index, offset, calcbypointeroffset) # 8 arg
112
112
else
113
- addoffset ! (ret, indvectorized , mulexpr (unrolledloopstride,unrolledsteps), vloopstride, indexstride, index, offset, calcbypointeroffset) # 8 arg
113
+ addvectoroffset ! (ret, mm , mulexpr (unrolledloopstride,unrolledsteps), vloopstride, indexstride, index, offset, calcbypointeroffset) # 8 arg
114
114
end
115
115
else
116
- addoffset! (ret, indvectorized , vloopstride, indexstride, index, offset + unrolledsteps, calcbypointeroffset) # 7 arg
116
+ addoffset! (ret, mm , vloopstride, indexstride, index, offset + unrolledsteps, calcbypointeroffset) # 7 arg
117
117
end
118
118
end
119
119
@@ -137,15 +137,28 @@ function mem_offset(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Vect
137
137
stride = strides[n] % Int
138
138
@unpack vstep = td
139
139
if loopedindex[n]
140
- addoffset! (ret, indvectorized, vstep, stride, ind, offset, inds_calc_by_ptr_offset[n] | (ind === CONSTANTZEROINDEX))
140
+ addoffset! (ret, indvectorized, vstep, stride, ind, offset, inds_calc_by_ptr_offset[n] | (ind === CONSTANTZEROINDEX)) # 7 arg
141
141
else
142
142
offset -= 1
143
143
newname, parent = symbolind (ind, op, td)
144
144
# _mmi = indvectorized && parent !== op && (!isvectorized(parent))
145
145
# addoffset!(ret, newname, stride, offset, _mmi)
146
146
_mmi = indvectorized && parent != = op && (! isvectorized (parent))
147
147
@assert ! _mmi " Please file an issue with an example of how you got this."
148
- addoffset! (ret, 0 , newname, offset, false )
148
+ if isu₁unrolled (parent) & (td. u₁ > 1 )
149
+ gf = GlobalRef (Core,:getfield )
150
+ firstnew = Expr (:call , gf, Expr (:call , gf, newname, 1 ), 1 , false )
151
+ if isvectorized (parent) & (! _mm)
152
+ firstnew = Expr (:call , lv (:unmm ), firstnew)
153
+ end
154
+ addoffset! (ret, 0 , firstnew, offset, false )
155
+ else
156
+ if isvectorized (parent) & (! _mm)
157
+ addoffset! (ret, 0 , Expr (:call , lv (:unmm ), newname), offset, false )
158
+ else
159
+ addoffset! (ret, 0 , newname, offset, false )
160
+ end
161
+ end
149
162
end
150
163
end
151
164
ret
@@ -249,20 +262,42 @@ function mem_offset_u(op::Operation, td::UnrollArgs, inds_calc_by_ptr_offset::Ve
249
262
ind_by_offset = inds_calc_by_ptr_offset[n] | (ind === CONSTANTZEROINDEX)
250
263
offset = convert (Int, offsets[n])
251
264
stride = convert (Int, strides[n])
252
- indvectorized = _mm & (ind === vloopsym)
265
+ indvectorized = ind === vloopsym
266
+ indvectorizedmm = _mm & indvectorized
253
267
if ind === u₁loopsym
254
- addoffset ! (ret, indvectorized , incr₁, u₁step, vstep, stride, ind, offset, ind_by_offset)
268
+ addvectoroffset ! (ret, indvectorizedmm , incr₁, u₁step, vstep, stride, ind, offset, ind_by_offset, indvectorized) # 9 arg
255
269
elseif ind === u₂loopsym
256
- addoffset! (ret, indvectorized, incr₂, u₂step, vstep, stride, ind, offset, ind_by_offset)
270
+ # if isstore(op)
271
+ # @show indvectorized, ind === vloopsym, u₂loopsym, incr₂
272
+ # end
273
+ addvectoroffset! (ret, indvectorizedmm, incr₂, u₂step, vstep, stride, ind, offset, ind_by_offset, indvectorized) # 9 arg
257
274
elseif loopedindex[n]
258
- addoffset! (ret, indvectorized , vstep, stride, ind, offset, ind_by_offset)
275
+ addoffset! (ret, indvectorizedmm , vstep, stride, ind, offset, ind_by_offset) # 7 arg
259
276
else
260
277
offset -= 1
261
278
newname, parent = symbolind (ind, op, td)
262
- _mmi = _mm && indvectorized && parent != = op && (! isvectorized (parent))
279
+ _mmi = indvectorizedmm && parent != = op && (! isvectorized (parent))
263
280
# addoffset!(ret, newname, 1, offset, _mmi)
264
281
@assert ! _mmi " Please file an issue with an example of how you got this."
265
- if stride == 1
282
+ if isvectorized (parent) & (! _mm)
283
+ if isu₁unrolled (parent) & (td. u₁ > 1 )
284
+ gf = GlobalRef (Core,:getfield )
285
+ newname_unmm = Expr (:call , lv (:unmm ), Expr (:call , gf, Expr (:call , gf, newname, 1 ), incr₁+ 1 , false ))
286
+ else
287
+ newname_unmm = Expr (:call , lv (:unmm ), newname)
288
+ end
289
+ if stride ≠ 1
290
+ newname_unmm = mulexpr (newname_unmm,stride)
291
+ end
292
+ addoffset! (ret, 0 , newname_unmm, offset, false )
293
+ elseif isu₁unrolled (parent) & (td. u₁ > 1 )
294
+ gf = GlobalRef (Core,:getfield )
295
+ firstnew = Expr (:call , gf, Expr (:call , gf, newname, 1 ), incr₁+ 1 , false )
296
+ if stride ≠ 1
297
+ firstnew = mulexpr (firstnew,stride)
298
+ end
299
+ addoffset! (ret, 0 , firstnew, offset, false )
300
+ elseif stride == 1
266
301
addoffset! (ret, 0 , newname, offset, false )
267
302
else
268
303
addoffset! (ret, 0 , mulexpr (newname,stride), offset, false )
276
311
@inline and_last (a, b) = a & b
277
312
@generated function and_last (v:: VecUnroll{N} , m) where {N}
278
313
q = Expr (:block , Expr (:meta ,:inline ), :(vd = data (v)))
279
- t = Expr (:tuple )
314
+ t = Expr (:call , lv ( :promote ) )
280
315
for n ∈ 1 : N
281
316
push! (t. args, :(getfield (vd, $ n, false )))
282
317
end
0 commit comments