|
| 1 | +function opisreduced(op::Operation) |
| 2 | + for rdep ∈ reduceddependencies(op) |
| 3 | + rdep ∉ loopdependencies(op) && return true |
| 4 | + end |
| 5 | + false |
| 6 | +end |
1 | 7 | function storeinstr_preprend(op::Operation, vloopsym::Symbol)
|
2 | 8 | # defaultstoreop = :vstore!
|
3 | 9 | # defaultstoreop = :vnoaliasstore!
|
4 | 10 | isvectorized(op) && return Symbol("")
|
5 | 11 | vloopsym ∉ reduceddependencies(op) && return Symbol("")
|
6 | 12 | # vectorized is not a loopdep, but is a reduced dep
|
7 |
| - opp = first(parents(op)) |
| 13 | + opp::Operation = first(parents(op)) |
8 | 14 | # while vectorized ∉ loopdependencies(opp)
|
9 |
| - while ((!isvectorized(opp)) || (any(rdep -> rdep ∉ loopdependencies(opp), reduceddependencies(opp)))) |
| 15 | + while ((!isvectorized(opp)) || opisreduced(opp)) |
10 | 16 | oppold = opp
|
11 | 17 | for oppp ∈ parents(opp)
|
12 | 18 | if vloopsym ∈ reduceddependencies(oppp)
|
@@ -145,88 +151,92 @@ function lower_store_collection!(
|
145 | 151 | end
|
146 | 152 | gf(s::Symbol, n::Int) = Expr(:call, GlobalRef(Core,:getfield), s, n, false)
|
147 | 153 | function lower_store!(
|
148 |
| - q::Expr, ls::LoopSet, op::Operation, ua::UnrollArgs, mask::Bool, |
149 |
| - reductfunc::Symbol = storeinstr_preprend(op, ua.vloop.itersymbol), inds_calc_by_ptr_offset = indices_calculated_by_pointer_offsets(ls, op.ref) |
150 |
| -) |
151 |
| - @unpack u₁, u₁loopsym, u₂loopsym, vloopsym, vloop, u₂max, suffix = ua |
152 |
| - omop = offsetloadcollection(ls) |
153 |
| - batchid, opind = omop.batchedcollectionmap[identifier(op)] |
154 |
| - if ((batchid ≠ 0) && isvectorized(op)) && (!rejectinterleave(op)) |
155 |
| - (opind == 1) && lower_store_collection!(q, ls, op, ua, mask, inds_calc_by_ptr_offset) |
156 |
| - return |
157 |
| - end |
158 |
| - falseexpr = Expr(:call, lv(:False)); |
159 |
| - aliasexpr = falseexpr; |
160 |
| - # trueexpr = Expr(:call, lv(:True)); |
161 |
| - rs = staticexpr(reg_size(ls)); |
162 |
| - opp = first(parents(op)) |
163 |
| - if (((opp.instruction.instr === reductfunc) || (opp.instruction.instr === :identity)) && isone(length(parents(opp)))) |
164 |
| - oppp = only(parents(opp)) |
165 |
| - if isu₂unrolled(op) == isu₂unrolled(oppp) |
166 |
| - opp = oppp |
167 |
| - end |
| 154 | + q::Expr, ls::LoopSet, op::Operation, ua::UnrollArgs, mask::Bool, |
| 155 | + reductfunc::Symbol = storeinstr_preprend(op, ua.vloop.itersymbol), inds_calc_by_ptr_offset = indices_calculated_by_pointer_offsets(ls, op.ref) |
| 156 | + ) |
| 157 | + @unpack u₁, u₁loopsym, u₂loopsym, vloopsym, vloop, u₂max, suffix = ua |
| 158 | + omop = offsetloadcollection(ls) |
| 159 | + batchid, opind = omop.batchedcollectionmap[identifier(op)] |
| 160 | + if ((batchid ≠ 0) && isvectorized(op)) && (!rejectinterleave(op)) |
| 161 | + (opind == 1) && lower_store_collection!(q, ls, op, ua, mask, inds_calc_by_ptr_offset) |
| 162 | + return |
| 163 | + end |
| 164 | + falseexpr = Expr(:call, lv(:False)); |
| 165 | + aliasexpr = falseexpr; |
| 166 | + # trueexpr = Expr(:call, lv(:True)); |
| 167 | + rs = staticexpr(reg_size(ls)); |
| 168 | + opp = first(parents(op)) |
| 169 | + if ((opp.instruction.instr === reductfunc) || (opp.instruction.instr === :identity)) |
| 170 | + parents_opp = parents(opp) |
| 171 | + opppstate = Base.iterate(parents_opp) |
| 172 | + if opppstate ≢ nothing |
| 173 | + oppp, state = opppstate |
| 174 | + if (Base.iterate(parents_opp, state) === nothing) && isu₂unrolled(op) == isu₂unrolled(oppp) |
| 175 | + opp = oppp |
| 176 | + end |
168 | 177 | end
|
| 178 | + end |
169 | 179 | # __u₂max = ls.unrollspecification.u₂
|
170 |
| - isu₁, isu₂ = isunrolled_sym(opp, u₁loopsym, u₂loopsym, vloopsym, ls)#, __u₂max) |
171 |
| - # @show isu₁, isu₂, u₁loopsym, u₂loopsym |
172 |
| - # @show isu₁, isu₂, opp, u₁loopsym, u₂loopsym, vloopsym |
173 |
| - u = isu₁ ? u₁ : 1 |
174 |
| - mvar = Symbol(variable_name(opp, ifelse(isu₂, suffix, -1)), '_', u) |
175 |
| - if all(op.ref.loopedindex) |
176 |
| - inds = unrolledindex(op, ua, mask, inds_calc_by_ptr_offset, ls) |
177 |
| - storeexpr = if reductfunc === Symbol("") |
178 |
| - Expr(:call, lv(:_vstore!), sptr(op), mvar, inds) |
179 |
| - else |
180 |
| - Expr(:call, lv(:_vstore!), lv(reductfunc), sptr(op), mvar, inds) |
181 |
| - end |
182 |
| - add_memory_mask!(storeexpr, op, ua, mask, ls) |
183 |
| - push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs) |
184 |
| - push!(q.args, storeexpr) |
| 180 | + isu₁, isu₂ = isunrolled_sym(opp, u₁loopsym, u₂loopsym, vloopsym, ls)#, __u₂max) |
| 181 | + # @show isu₁, isu₂, u₁loopsym, u₂loopsym |
| 182 | + # @show isu₁, isu₂, opp, u₁loopsym, u₂loopsym, vloopsym |
| 183 | + u = isu₁ ? u₁ : 1 |
| 184 | + mvar = Symbol(variable_name(opp, ifelse(isu₂, suffix, -1)), '_', u) |
| 185 | + if all(op.ref.loopedindex) |
| 186 | + inds = unrolledindex(op, ua, mask, inds_calc_by_ptr_offset, ls) |
| 187 | + storeexpr = if reductfunc === Symbol("") |
| 188 | + Expr(:call, lv(:_vstore!), sptr(op), mvar, inds) |
185 | 189 | else
|
186 |
| - parents_op = parents(op) |
187 |
| - data_u₁ = isu₁ & (u₁ > 1) |
188 |
| - |
189 |
| - indices_u₁ = data_u₁ |
190 |
| - if !data_u₁ & (length(parents_op) > 1) |
191 |
| - indices_u₁ = first(isunrolled_sym(op, u₁loopsym, u₂loopsym, vloopsym, ls)) |
192 |
| - end |
193 |
| - if indices_u₁ |
194 |
| - mvard = Symbol(mvar, "##data##") |
195 |
| - # isu₁ && |
196 |
| - data_u₁ && push!(q.args, Expr(:(=), mvard, Expr(:call, lv(:data), mvar))) |
197 |
| - sptrsym = sptr!(q, op) |
198 |
| - for u ∈ 1:u₁ |
199 |
| - inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, u-1, ls) |
200 |
| - # @show isu₁unrolled(opp), opp |
201 |
| - storeexpr = if data_u₁ |
202 |
| - if reductfunc === Symbol("") |
203 |
| - Expr(:call, lv(:_vstore!), sptrsym, gf(mvard,u), inds) |
204 |
| - else |
205 |
| - Expr(:call, lv(:_vstore!), lv(reductfunc), sptrsym, mvaru, inds) |
206 |
| - end |
207 |
| - elseif reductfunc === Symbol("") |
208 |
| - Expr(:call, lv(:_vstore!), sptrsym, mvar, inds) |
| 190 | + Expr(:call, lv(:_vstore!), lv(reductfunc), sptr(op), mvar, inds) |
| 191 | + end |
| 192 | + add_memory_mask!(storeexpr, op, ua, mask, ls) |
| 193 | + push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs) |
| 194 | + push!(q.args, storeexpr) |
| 195 | + else |
| 196 | + parents_op = parents(op) |
| 197 | + data_u₁ = isu₁ & (u₁ > 1) |
| 198 | + |
| 199 | + indices_u₁ = data_u₁ |
| 200 | + if !data_u₁ & (length(parents_op) > 1) |
| 201 | + indices_u₁ = first(isunrolled_sym(op, u₁loopsym, u₂loopsym, vloopsym, ls)) |
| 202 | + end |
| 203 | + if indices_u₁ |
| 204 | + mvard = Symbol(mvar, "##data##") |
| 205 | + # isu₁ && |
| 206 | + data_u₁ && push!(q.args, Expr(:(=), mvard, Expr(:call, lv(:data), mvar))) |
| 207 | + sptrsym = sptr!(q, op) |
| 208 | + for u ∈ 1:u₁ |
| 209 | + inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, u-1, ls) |
| 210 | + # @show isu₁unrolled(opp), opp |
| 211 | + storeexpr = if data_u₁ |
| 212 | + if reductfunc === Symbol("") |
| 213 | + Expr(:call, lv(:_vstore!), sptrsym, gf(mvard,u), inds) |
209 | 214 | else
|
210 |
| - Expr(:call, lv(:_vstore!), lv(reductfunc), sptrsym, mvar, inds) |
| 215 | + Expr(:call, lv(:_vstore!), lv(reductfunc), sptrsym, mvaru, inds) |
211 | 216 | end
|
212 |
| - domask = mask && (isvectorized(op) & ((u == u₁) | (vloopsym !== u₁loopsym))) |
213 |
| - add_memory_mask!(storeexpr, op, ua, domask, ls)# & ((u == u₁) | isvectorized(op))) |
214 |
| - push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs) |
215 |
| - push!(q.args, storeexpr) |
216 |
| - end |
217 |
| - else |
218 |
| - inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, 0, ls) |
219 |
| - storeexpr = if reductfunc === Symbol("") |
220 |
| - Expr(:call, lv(:_vstore!), sptr(op), mvar, inds) |
| 217 | + elseif reductfunc === Symbol("") |
| 218 | + Expr(:call, lv(:_vstore!), sptrsym, mvar, inds) |
221 | 219 | else
|
222 |
| - Expr(:call, lv(:_vstore!), lv(reductfunc), sptr(op), mvar, inds) |
| 220 | + Expr(:call, lv(:_vstore!), lv(reductfunc), sptrsym, mvar, inds) |
223 | 221 | end
|
224 |
| - add_memory_mask!(storeexpr, op, ua, mask, ls) |
| 222 | + domask = mask && (isvectorized(op) & ((u == u₁) | (vloopsym !== u₁loopsym))) |
| 223 | + add_memory_mask!(storeexpr, op, ua, domask, ls)# & ((u == u₁) | isvectorized(op))) |
225 | 224 | push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs)
|
226 | 225 | push!(q.args, storeexpr)
|
227 | 226 | end
|
| 227 | + else |
| 228 | + inds = mem_offset_u(op, ua, inds_calc_by_ptr_offset, true, 0, ls) |
| 229 | + storeexpr = if reductfunc === Symbol("") |
| 230 | + Expr(:call, lv(:_vstore!), sptr(op), mvar, inds) |
| 231 | + else |
| 232 | + Expr(:call, lv(:_vstore!), lv(reductfunc), sptr(op), mvar, inds) |
| 233 | + end |
| 234 | + add_memory_mask!(storeexpr, op, ua, mask, ls) |
| 235 | + push!(storeexpr.args, falseexpr, aliasexpr, falseexpr, rs) |
| 236 | + push!(q.args, storeexpr) |
228 | 237 | end
|
229 |
| - nothing |
| 238 | + end |
| 239 | + nothing |
230 | 240 | end
|
231 | 241 |
|
232 | 242 | function lower_tiled_store!(
|
|
0 commit comments