@@ -66,6 +66,8 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
66
66
end
67
67
isbroadcast = ls. isbroadcast[]
68
68
gespinds = Expr (:tuple )
69
+ offsetprecalc_descript = Expr (:tuple )
70
+ use_offsetprecalc = false
69
71
for i ∈ eachindex (li)
70
72
ii = i + offset
71
73
ind = indices[ii]
@@ -78,9 +80,12 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
78
80
if (! li[i])
79
81
uliv[i] = 0
80
82
push! (gespinds. args, Expr (:call , lv (:Zero )))
83
+ push! (offsetprecalc_descript. args, 0 )
81
84
elseif isbroadcast || ((isone (ii) && (last (looporder) === ind)) && ! (otherindexunrolled (ls, ind, ar)) || multiple_with_name (vptr (ar), allarrayrefs)) || isstaticloop (getloop (ls, ind))
85
+ # Not doing normal offset indexing
82
86
uliv[i] = - findfirst (isequal (ind), looporder):: Int
83
87
push! (gespinds. args, Expr (:call , lv (:Zero )))
88
+ push! (offsetprecalc_descript. args, 0 ) # not doing offset indexing, so push 0
84
89
else
85
90
uliv[i] = findfirst (isequal (ind), looporder):: Int
86
91
loop = getloop (ls, ind)
@@ -89,9 +94,24 @@ function use_loop_induct_var!(ls::LoopSet, q::Expr, ar::ArrayReferenceMeta, alla
89
94
else
90
95
push! (gespinds. args, Expr (:call , lv (:staticm1 ), loop. startsym))
91
96
end
97
+ if ind === names (ls)[us. vectorizedloopnum]
98
+ push! (offsetprecalc_descript. args, 0 )
99
+ elseif (ind === names (ls)[us. u₁loopnum]) & (us. u₁ > 3 )
100
+ use_offsetprecalc = true
101
+ push! (offsetprecalc_descript. args, us. u₁)
102
+ elseif (ind === names (ls)[us. u₂loopnum]) & (us. u₂ > 3 )
103
+ use_offsetprecalc = true
104
+ push! (offsetprecalc_descript. args, us. u₂)
105
+ else
106
+ push! (offsetprecalc_descript. args, 0 )
107
+ end
92
108
end
93
109
end
94
- push! (q. args, Expr (:(= ), vptr (ar), Expr (:call , lv (:gesp ), vptr (ar), gespinds)))
110
+ if use_offsetprecalc
111
+ push! (q. args, Expr (:(= ), vptr (ar), Expr (:call , lv (:offsetprecalc ), Expr (:call , lv (:gesp ), vptr (ar), gespinds), Expr (:call , Expr (:curly , :Val , offsetprecalc_descript)))))
112
+ else
113
+ push! (q. args, Expr (:(= ), vptr (ar), Expr (:call , lv (:gesp ), vptr (ar), gespinds)))
114
+ end
95
115
uliv
96
116
end
97
117
@@ -197,13 +217,8 @@ function pointermax(ls::LoopSet, ar::ArrayReferenceMeta, n::Int, sub::Int, isvec
197
217
end
198
218
@show ar, loopsym
199
219
end
200
- # @inline function assume_a_greater_than_b_ret_a(a::Ptr, b::VectorizationBase.AbstractStridedPointer)
201
- # SIMDPirates.assume(pointer(b) < a)
202
- # a
203
- # end
204
- # @inline assume_a_greater_than_b_ret_a(a, b) = a
220
+
205
221
function defpointermax (ls:: LoopSet , ar:: ArrayReferenceMeta , n:: Int , sub:: Int , isvectorized:: Bool ):: Expr
206
- # Expr(:(=), maxsym(vptr(ar), sub), Expr(:call, lv(:assume_a_greater_than_b_ret_a), pointermax(ls, ar, n, sub, isvectorized), vptr(ar)))
207
222
Expr (:(= ), maxsym (vptr (ar), sub), pointermax (ls, ar, n, sub, isvectorized))
208
223
end
209
224
0 commit comments