@@ -48,15 +48,15 @@ T = Float64
48
48
# end);
49
49
# lsq2d = LoopVectorization.LoopSet(q2d); LoopVectorization.choose_order(lsq2d)
50
50
51
- oq2 = :(for j in rng2, i in rng1
52
- tmp = zero (eltype (out))
53
- for jk in - 1 : 1 , ik in - 1 : 1
54
- tmp += A[i+ ik,j+ jk]* kern[ik,jk]
55
- end
56
- out[i,j] = tmp
57
- end );
58
- lsoq = LoopVectorization. LoopSet (oq2);
59
- LoopVectorization. choose_order (lsoq)
51
+ # oq2 = :(for j in rng2, i in rng1
52
+ # tmp = zero(eltype(out))
53
+ # for jk in -1:1, ik in -1:1
54
+ # tmp += A[i+ik,j+jk]*kern[ik,jk]
55
+ # end
56
+ # out[i,j] = tmp
57
+ # end);
58
+ # lsoq = LoopVectorization.LoopSet(oq2);
59
+ # LoopVectorization.choose_order(lsoq)
60
60
61
61
function avx2d! (out:: AbstractMatrix , A:: AbstractMatrix , kern)
62
62
rng1k, rng2k = axes (kern)
@@ -141,19 +141,20 @@ T = Float64
141
141
# lsuq = LoopVectorization.LoopSet(macroexpand(Base, uq));
142
142
# LoopVectorization.choose_order(lsuq)
143
143
144
- # out = out1;
145
- # z = zero(eltype(out));
146
- # R=CartesianIndices(out);
147
- # Rk = CartesianIndices(kern);
148
- # lsgeneric = LoopVectorization.@avx_debug for I in R
149
- # tmp = z
150
- # for J in Rk
151
- # tmp += A[I+J]*kern[J]
152
- # end
153
- # out[I] = tmp
154
- # end;
155
- # LoopVectorization.choose_order(lsgeneric)
156
- # out = out1;
144
+ # using LoopVectorization, OffsetArrays
145
+ # T = Float64
146
+ # A = rand(T, 100, 100);
147
+ # kern = OffsetArray(rand(T, 3, 3), -1:1, -1:1);
148
+ # out = OffsetArray(similar(A, size(A).-2), 1, 1); # stay away from the edges of A
149
+ # lsgeneric = LoopVectorization.@avx_debug for I in CartesianIndices(out)
150
+ # tmp = zero(eltype(out))
151
+ # for J in CartesianIndices(kern)
152
+ # tmp += A[I+J]*kern[J]
153
+ # end
154
+ # out[I] = tmp
155
+ # end;
156
+ # LoopVectorization.choose_order(lsgeneric)
157
+ # # out = out1;
157
158
# lsgenerics = LoopVectorization.@avx_debug for I in CartesianIndices(out)
158
159
# tmp = zero(eltype(out))
159
160
# for J in CartesianIndices(skern)
@@ -194,7 +195,7 @@ T = Float64
194
195
195
196
for T ∈ (Float32, Float64)
196
197
@show T, @__LINE__
197
- A = rand (T, 100 , 100 );
198
+ A = rand (T, 100 , 100 ); At = copy (A ' );
198
199
kern = OffsetArray (rand (T, 3 , 3 ), - 1 : 1 , - 1 : 1 );
199
200
skern = SizedOffsetMatrix {T,-1,1,-1,1} (parent (kern));
200
201
out1 = OffsetArray (similar (A, size (A).- 2 ), 1 , 1 ); # stay away from the edges of A
@@ -210,6 +211,15 @@ T = Float64
210
211
fill! (out2, NaN ); avx2d! (out2, A, skern);
211
212
@test out1 ≈ out2
212
213
214
+ fill! (out2, NaN ); avx2d! (out2, At' , skern);
215
+ @test out1 ≈ out2
216
+
217
+ fill! (out2, NaN ); avx2d! (out2' , A, skern);
218
+ @test out1 ≈ out2'
219
+
220
+ fill! (out2, NaN ); avx2d! (out2' , At' , skern);
221
+ @test out1 ≈ out2'
222
+
213
223
fill! (out3, NaN ); avx2douter! (out3, A, skern);
214
224
@test out1 ≈ out3
215
225
0 commit comments