@@ -188,19 +188,20 @@ using LoopVectorization.VectorizationBase: StaticUnitRange
188
188
out
189
189
end
190
190
function avxgeneric2! (out, A, kern)
191
- @avx for I in CartesianIndices (out)
192
- tmp = zero (eltype (out))
193
- for J in CartesianIndices (kern)
194
- tmp += A[I+ J]* kern[J]
195
- end
196
- out[I] = tmp
197
- end
198
- out
199
- end
191
+ @avx for I in CartesianIndices (out)
192
+ tmp = zero (eltype (out))
193
+ for J in CartesianIndices (kern)
194
+ tmp += A[I+ J]* kern[J]
195
+ end
196
+ out[I] = tmp
197
+ end
198
+ out
199
+ end
200
200
201
201
for T ∈ (Float32, Float64)
202
202
@show T, @__LINE__
203
203
Abase = fill (T (NaN ), 200 , 200 );
204
+ # out of bounds reads load NaNs, poisoning results leading to test failure.
204
205
A = view (Abase, 51 : 150 , 51 : 150 );
205
206
A .= rand .();
206
207
Atbase = copy (Abase' );
@@ -209,56 +210,57 @@ using LoopVectorization.VectorizationBase: StaticUnitRange
209
210
@show r
210
211
fr = first (r); lr = last (r);
211
212
kern = OffsetArray (rand (T, length (r), length (r)), r, r);
212
- out1 = OffsetArray (view (similar (A, size (A) .+ 32 ), (1 + lr: 100 - lr) .+ 32 , (1 + lr: 100 - lr) .+ 32 ), lr, lr); # stay away from the edges of A
213
+ # We test parent equality so that an accidental write out of bounds leading to test failure.
214
+ out1 = OffsetArray (view (fill (T (- 123456.789 ), size (A) .+ 32 ), (1 + lr: 100 - lr) .+ 32 , (1 + lr: 100 - lr) .+ 32 ), lr, lr); # stay away from the edges of A
213
215
# out1 = OffsetArray(similar(A, size(A).-2), 1, 1); # stay away from the edges of A
214
- out2 = similar (out1); out3 = similar (out1); out4 = similar (out1);
216
+ out2 = deepcopy (out1); out3 = deepcopy (out1); out4 = deepcopy (out1);
215
217
skern = SizedOffsetMatrix {T,fr,lr,fr,lr} (parent (kern));
216
218
217
219
old2d! (out1, A, kern);
218
220
avx2d! (out2, A, kern);
219
- @test out1 ≈ out2
221
+ @test parent ( out1) ≈ parent ( out2)
220
222
221
223
avx2douter! (out3, A, kern);
222
- @test out1 ≈ out3
224
+ @test parent ( out1) ≈ parent ( out3)
223
225
224
226
fill! (out2, NaN ); avx2d! (out2, A, skern);
225
- @test out1 ≈ out2
227
+ @test parent ( out1) ≈ parent ( out2)
226
228
227
229
fill! (out2, NaN ); avx2douter! (out2, At' , kern);
228
- @test out1 ≈ out2
230
+ @test parent ( out1) ≈ parent ( out2)
229
231
230
232
fill! (out2, NaN ); avx2douter! (out2' , A, kern);
231
- @test out1 ≈ out2'
233
+ @test parent ( out1) ≈ parent ( out2) '
232
234
233
235
fill! (out2, NaN ); avx2douter! (out2' , At' , kern);
234
- @test out1 ≈ out2'
236
+ @test parent ( out1) ≈ parent ( out2) '
235
237
236
238
fill! (out3, NaN ); avx2douter! (out3, A, skern);
237
- @test out1 ≈ out3
239
+ @test parent ( out1) ≈ parent ( out3)
238
240
239
241
if r == - 1 : 1
240
242
fill! (out3, NaN ); avx2dunrolled! (out3, A, skern);
241
- @test out1 ≈ out3
243
+ @test parent ( out1) ≈ parent ( out3)
242
244
243
245
fill! (out3, NaN ); avx2dunrolled2x2! (out3, A, skern);
244
- @test out1 ≈ out3
246
+ @test parent ( out1) ≈ parent ( out3)
245
247
246
248
fill! (out3, NaN ); avx2dunrolled3x3! (out3, A, skern);
247
- @test out1 ≈ out3
249
+ @test parent ( out1) ≈ parent ( out3)
248
250
end
249
251
250
- @test avxgeneric! (out4, A, kern) ≈ out1
252
+ @test parent ( avxgeneric! (out4, A, kern)) ≈ parent ( out1)
251
253
fill! (out4, NaN );
252
- @test avxgeneric! (out4, A, skern) ≈ out1
254
+ @test parent ( avxgeneric! (out4, A, skern)) ≈ parent ( out1)
253
255
254
- fill! (out4, NaN ); @test avxgeneric2! (out4, A, kern) ≈ out1
255
- fill! (out4, NaN ); @test avxgeneric2! (out4, A, skern) ≈ out1
256
- fill! (out4, NaN ); @test avxgeneric2! (out4, At' , kern) ≈ out1
257
- fill! (out4, NaN ); @test avxgeneric2! (out4, At' , skern) ≈ out1
258
- fill! (out4, NaN ); @test avxgeneric2! (out4' , A, kern) ≈ out1
259
- fill! (out4, NaN ); @test avxgeneric2! (out4' , A, skern) ≈ out1
260
- fill! (out4, NaN ); @test avxgeneric2! (out4' , At' , kern) ≈ out1
261
- fill! (out4, NaN ); @test avxgeneric2! (out4' , At' , skern) ≈ out1
256
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4, A, kern)) ≈ parent ( out1)
257
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4, A, skern)) ≈ parent ( out1)
258
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4, At' , kern)) ≈ parent ( out1)
259
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4, At' , skern)) ≈ parent ( out1)
260
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4' , A, kern)' ) ' ≈ parent ( out1)
261
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4' , A, skern)' ) ' ≈ parent ( out1)
262
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4' , At' , kern)' ) ' ≈ parent ( out1)
263
+ fill! (out4, NaN ); @test parent ( avxgeneric2! (out4' , At' , skern)' ) ' ≈ parent ( out1)
262
264
end
263
265
end
264
266
0 commit comments