@@ -177,21 +177,17 @@ function issue209_noavx(M, G, J, H, B, ϕ)
177
177
end
178
178
using LoopVectorization
179
179
180
- function r_turbo! (r1, r2)
181
- m = size (r1,2 )
182
- n = size (r1,3 )
183
- @turbo thread= true for j= 2 : n- 1 , i= 1 : m- 1
180
+ function sumdim2_turbo! (r1, r2)
181
+ @turbo thread= true for j = indices ((r1,r2),(3 ,4 )), i ∈ indices ((r1,r2),(2 ,3 ))
184
182
r1[1 ,i,j] = r2[1 ,1 ,i,j] + r2[1 ,2 ,i,j]
185
183
r1[2 ,i,j] = r2[2 ,1 ,i,j] - r2[2 ,2 ,i,j]
186
184
r1[3 ,i,j] = r2[3 ,1 ,i,j] * r2[3 ,2 ,i,j]
187
185
r1[4 ,i,j] = r2[4 ,1 ,i,j] / r2[4 ,2 ,i,j]
188
186
end
189
187
r1
190
188
end
191
- function r! (r1, r2)
192
- m = size (r1,2 )
193
- n = size (r1,3 )
194
- @inbounds @fastmath for j= 2 : n- 1 , i= 1 : m- 1
189
+ function sumdim2! (r1, r2)
190
+ @inbounds @fastmath for j = indices ((r1,r2),(3 ,4 )), i ∈ indices ((r1,r2),(2 ,3 ))
195
191
r1[1 ,i,j] = r2[1 ,1 ,i,j] + r2[1 ,2 ,i,j]
196
192
r1[2 ,i,j] = r2[2 ,1 ,i,j] - r2[2 ,2 ,i,j]
197
193
r1[3 ,i,j] = r2[3 ,1 ,i,j] * r2[3 ,2 ,i,j]
257
253
258
254
s = Array {Float64} (undef, 4 , 128 , 128 );
259
255
s2 = rand (4 , 2 , 128 , 128 );
260
- @test r_turbo (s, s2) ≈ r (similar (s), s2)
256
+ @test sumdim2_turbo! (s, s2) ≈ sumdim2! (similar (s), s2)
261
257
262
258
end
263
259
0 commit comments