@@ -142,45 +142,150 @@ function lower_load_vectorized!(
142
142
end
143
143
nothing
144
144
end
145
+ function indisvectorized (ls:: LoopSet , ind:: Symbol , vectorized:: Symbol )
146
+ for op ∈ operations (ls)
147
+ ((op. variable === ind) && isvectorized (op)) && return true
148
+ end
149
+ false
150
+ end
151
+
152
+ # function lower_load_for_optranslation!(
153
+ # q::Expr, op::Operation, ls::LoopSet, td::UnrollArgs{Int}, mask::Union{Nothing,Symbol,Unsigned}, translationind::Int
154
+ # )
155
+ # @unpack u₁, u₁loopsym, u₂loopsym, vectorized, u₂max, suffix = td
156
+ # iszero(suffix) || return
157
+
158
+ # gespinds = mem_offset(op, UnrollArgs(td, 0), indices_calculated_by_pointer_offsets(ls, op.ref), false)
159
+ # ptr = vptr(op)
160
+ # gptr = Symbol(ptr, "##GESPED##")
161
+ # for i ∈ eachindex(gespinds.args)
162
+ # if i != translationind
163
+ # gespinds.args[i] = Expr(:call, lv(:extract_data), gespinds.args[i])
164
+ # end
165
+ # end
166
+ # push!(q.args, Expr(:(=), gptr, Expr(:call, lv(:gesp), ptr, gespinds)))
167
+
168
+ # inds = Expr(:tuple)
169
+ # ginds = Expr(:tuple)
170
+ # indices = getindicesonly(op)
171
+
172
+ # for (i,ind) ∈ enumerate(indices)
173
+ # if i == translationind # ind cannot be the translation ind
174
+ # push!(inds.args, Expr(:call, lv(:Zero)))
175
+ # push!(ginds.args, Expr(:call, Expr(:curly, lv(:Static), 1)))
176
+ # elseif (ind === vectorized) || indisvectorized(ls, ind, vectorized)
177
+ # push!(inds.args, _MMind(Expr(:call, lv(:Zero))))
178
+ # push!(ginds.args, Expr(:call, lv(:Zero)))
179
+ # else
180
+ # push!(inds.args, Expr(:call, lv(:Zero)))
181
+ # push!(ginds.args, Expr(:call, lv(:Zero)))
182
+ # end
183
+ # end
184
+ # varbase = variable_name(op, 0)
185
+ # vloadexpr = Expr(:call, lv(:vload), gptr, inds)
186
+ # gespexpr = Expr(:(=), gptr, Expr(:call, lv(:gesp), gptr, ginds))
187
+ # push!(q.args, Expr(:(=), Symbol(varbase, 0), vloadexpr))
188
+
189
+ # for u ∈ 1:u₁-1
190
+ # push!(q.args, gespexpr)
191
+ # push!(q.args, Expr(:(=), Symbol(varbase, u), vloadexpr))
192
+ # end
193
+ # # this takes care of u₂ == 0
194
+ # offset = u₁
195
+ # for u₂ ∈ 1:u₂max-1
196
+ # varold = varbase
197
+ # varbase = variable_name(op, u₂)
198
+ # for u ∈ 0:u₁-2
199
+ # push!(q.args, Expr(:(=), Symbol(varbase, u), Symbol(varold, u + 1)))
200
+ # end
201
+ # push!(q.args, gespexpr)
202
+ # push!(q.args, Expr(:(=), Symbol(varbase, u₁ - 1), vloadexpr))
203
+ # offset += 1
204
+ # end
205
+ # nothing
206
+ # end
207
+
208
+
209
+ function lower_load_for_optranslation! (
210
+ q:: Expr , op:: Operation , ls:: LoopSet , td:: UnrollArgs{Int} , mask:: Union{Nothing,Symbol,Unsigned} , translationind:: Int
211
+ )
212
+ @unpack u₁, u₁loopsym, u₂loopsym, vectorized, u₂max, suffix = td
213
+ iszero (suffix) || return
214
+
215
+ gespinds = mem_offset (op, UnrollArgs (td, 0 ), indices_calculated_by_pointer_offsets (ls, op. ref), false )
216
+ ptr = vptr (op)
217
+ gptr = Symbol (ptr, " ##GESPED##" )
218
+ for i ∈ eachindex (gespinds. args)
219
+ if i != translationind
220
+ gespinds. args[i] = Expr (:call , lv (:extract_data ), gespinds. args[i])
221
+ end
222
+ end
223
+ push! (q. args, Expr (:(= ), gptr, Expr (:call , lv (:gesp ), ptr, gespinds)))
224
+
225
+ inds = Expr (:tuple )
226
+ indices = getindicesonly (op)
227
+
228
+ for (i,ind) ∈ enumerate (indices)
229
+ if i == translationind # ind cannot be the translation ind
230
+ push! (inds. args, Expr (:call , Expr (:curly , lv (:Static ), 0 )))
231
+ elseif (ind === vectorized) || indisvectorized (ls, ind, vectorized)
232
+ push! (inds. args, _MMind (Expr (:call , lv (:Zero ))))
233
+ else
234
+ push! (inds. args, Expr (:call , lv (:Zero )))
235
+ end
236
+ end
237
+ varbase = variable_name (op, 0 )
238
+ push! (q. args, Expr (:(= ), Symbol (varbase, 0 ), Expr (:call , lv (:vload ), gptr, copy (inds))))
239
+
240
+ for u ∈ 1 : u₁- 1
241
+ inds. args[translationind] = Expr (:call , Expr (:curly , lv (:Static ), u))
242
+ push! (q. args, Expr (:(= ), Symbol (varbase, u), Expr (:call , lv (:vload ), gptr, copy (inds))))
243
+ end
244
+ # this takes care of u₂ == 0
245
+ offset = u₁
246
+ for u₂ ∈ 1 : u₂max- 1
247
+ varold = varbase
248
+ varbase = variable_name (op, u₂)
249
+ for u ∈ 0 : u₁- 2
250
+ push! (q. args, Expr (:(= ), Symbol (varbase, u), Symbol (varold, u + 1 )))
251
+ end
252
+ inds. args[translationind] = Expr (:call , Expr (:curly , lv (:Static ), offset))
253
+ push! (q. args, Expr (:(= ), Symbol (varbase, u₁ - 1 ), Expr (:call , lv (:vload ), gptr, copy (inds))))
254
+ offset += 1
255
+ end
256
+ nothing
257
+ end
145
258
146
259
# TODO : this code should be rewritten to be more "orthogonal", so that we're just combining separate pieces.
147
260
# Using sentinel values (eg, T = -1 for non tiling) in part to avoid recompilation.
148
261
function lower_load! (
149
262
q:: Expr , op:: Operation , ls:: LoopSet , td:: UnrollArgs , mask:: Union{Nothing,Symbol,Unsigned} = nothing
150
263
)
151
264
@unpack u₁, u₁loopsym, u₂loopsym, vectorized, suffix = td
152
- if ! isnothing (suffix) && suffix > 0
265
+ if ! isnothing (suffix) && ls . loadelimination[]
153
266
istr, ispl = isoptranslation (ls, op, UnrollSymbols (u₁loopsym, u₂loopsym, vectorized))
154
- if istr && ispl
155
- varnew = variable_name (op, suffix)
156
- varold = variable_name (op, suffix - 1 )
157
- for u ∈ 0 : u₁- 2
158
- push! (q. args, Expr (:(= ), Symbol (varnew, u), Symbol (varold, u + 1 )))
159
- end
160
- umin = u₁ - 1
161
- elseif u₂loopsym != = vectorized
162
- mno, id = maxnegativeoffset (ls, op, u₂loopsym)
163
- if - suffix < mno < 0
164
- varnew = variable_name (op, suffix)
165
- varold = variable_name (operations (ls)[id], suffix + mno)
166
- opold = operations (ls)[id]
167
- if isu₁unrolled (op)
168
- for u ∈ 0 : u₁- 1
169
- push! (q. args, Expr (:(= ), Symbol (varnew, u), Symbol (varold, u)))
267
+ if ! iszero (istr) & ispl
268
+ lower_load_for_optranslation! (q, op, ls, td, mask, istr)
269
+ elseif suffix > 0
270
+ if u₂loopsym != = vectorized
271
+ mno, id = maxnegativeoffset (ls, op, u₂loopsym)
272
+ if - suffix < mno < 0
273
+ varnew = variable_name (op, suffix)
274
+ varold = variable_name (operations (ls)[id], suffix + mno)
275
+ opold = operations (ls)[id]
276
+ if isu₁unrolled (op)
277
+ for u ∈ 0 : u₁- 1
278
+ push! (q. args, Expr (:(= ), Symbol (varnew, u), Symbol (varold, u)))
279
+ end
280
+ else
281
+ push! (q. args, Expr (:(= ), varnew, varold))
170
282
end
171
- else
172
- push! (q. args, Expr (:(= ), varnew, varold))
283
+ return
173
284
end
174
- return
175
- else
176
- umin = 0
177
285
end
178
- else
179
- umin = 0
180
286
end
181
- else
182
- umin = 0
183
287
end
288
+ umin = 0
184
289
if isvectorized (op)
185
290
lower_load_vectorized! (q, ls, op, td, mask, umin)
186
291
else
0 commit comments