@@ -4,14 +4,14 @@ function vmap_quote(N, ::Type{T}) where {T}
4
4
val = Expr (:call , Expr (:curly , :Val , W))
5
5
q = Expr (:block , Expr (:(= ), :M , Expr (:call , :length , :dest )), Expr (:(= ), :vdest , Expr (:call , :pointer , :dest )), Expr (:(= ), :m , 0 ))
6
6
fcall = Expr (:call , :f )
7
- loopbody = Expr (:block , Expr (:call , :vstore! , :vdest , fcall , :m ), Expr (:(+= ), :m , W))
7
+ loopbody = Expr (:block , Expr (:call , :vstore! , Expr ( :call , :gep , :vdest , :m ), fcall ), Expr (:(+= ), :m , W))
8
8
fcallmask = Expr (:call , :f )
9
- bodymask = Expr (:block , Expr (:(= ), :__mask__ , Expr (:call , :mask , val, Expr (:call , :& , :M , W- 1 ))), Expr (:call , :vstore! , :vdest , fcallmask , :m , :__mask__ ))
9
+ bodymask = Expr (:block , Expr (:(= ), :__mask__ , Expr (:call , :mask , val, Expr (:call , :& , :M , W- 1 ))), Expr (:call , :vstore! , Expr ( :call , :gep , :vdest , :m ), fcallmask , :__mask__ ))
10
10
for n ∈ 1 : N
11
11
arg_n = Symbol (:varg_ ,n)
12
12
push! (q. args, Expr (:(= ), arg_n, Expr (:macrocall , Symbol (" @inbounds" ), LineNumberNode (@__LINE__ ,Symbol (@__FILE__ )), Expr (:call , :pointer , Expr (:ref , :args , n)))))
13
- push! (fcall. args, Expr (:call , :vload , val, arg_n, :m ))
14
- push! (fcallmask. args, Expr (:call , :vload , val, arg_n, :m , :__mask__ ))
13
+ push! (fcall. args, Expr (:call , :vload , val, Expr ( :call , :gep , arg_n, :m ) ))
14
+ push! (fcallmask. args, Expr (:call , :vload , val, Expr ( :call , :gep , arg_n, :m ) , :__mask__ ))
15
15
end
16
16
loop = Expr (:for , Expr (:(= ), :_ , Expr (:call , :(:), 0 , Expr (:call , :- , Expr (:call , :(>>> ), :M , Wshift), 1 ))), loopbody)
17
17
push! (q. args, loop)
@@ -114,17 +114,17 @@ function vmapnt!(f::F, y::AbstractVector{T}, args::Vararg{<:Any,A}) where {F,T,A
114
114
W = VectorizationBase. pick_vector_width (T)
115
115
V = VectorizationBase. pick_vector_width_val (T)
116
116
while i < N - ((W << 2 ) - 1 )
117
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
118
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
119
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
120
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
117
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
118
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
119
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
120
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
121
121
end
122
122
while i < N - (W - 1 ) # stops at 16 when
123
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
123
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
124
124
end
125
- if i < N
125
+ if i < N
126
126
m = mask (T, N & (W - 1 ))
127
- vstore! (ptry, extract_data (f (vload .(V, ptrargs, i, m)... )), i , m)
127
+ vstore! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i) , m)... )), m)
128
128
end
129
129
y
130
130
end
@@ -143,18 +143,18 @@ function vmapntt!(f::F, y::AbstractVector{T}, args::Vararg{<:Any,A}) where {F,T,
143
143
Niter = N >>> Wsh
144
144
Base. Threads. @threads for j ∈ 0 : Niter- 1
145
145
i = j << Wsh
146
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
147
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
148
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i ); i += W
149
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, i)... )), i )
146
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
147
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
148
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... ))); i += W
149
+ vstorent! (gep ( ptry, i), extract_data (f (vload .(V, gep .( ptrargs, i)) ... )))
150
150
end
151
151
ii = Niter << Wsh
152
152
while ii < N - (W - 1 ) # stops at 16 when
153
- vstorent! (ptry, extract_data (f (vload .(V, ptrargs, ii)... )), ii ); ii += W
153
+ vstorent! (gep ( ptry, ii), extract_data (f (vload .(V, gep .( ptrargs, ii)) ... ))); ii += W
154
154
end
155
155
if ii < N
156
156
m = mask (T, N & (W - 1 ))
157
- vstore! (ptry, extract_data (f (vload .(V, ptrargs, ii, m)... )), ii , m)
157
+ vstore! (gep ( ptry, ii), extract_data (f (vload .(V, gep .( ptrargs, ii) , m)... )), m)
158
158
end
159
159
y
160
160
end
0 commit comments