|
1 | 1 | using LoopVectorization, OffsetArrays, Test
|
2 | 2 | using LoopVectorization.VectorizationBase: StaticUnitRange
|
3 |
| -T = Float64 |
| 3 | +# T = Float64 |
4 | 4 | # T = Float32
|
5 | 5 |
|
6 | 6 | @testset "OffsetArrays" begin
|
@@ -201,55 +201,61 @@ T = Float64
|
201 | 201 | for T ∈ (Float32, Float64)
|
202 | 202 | @show T, @__LINE__
|
203 | 203 | A = rand(T, 100, 100); At = copy(A');
|
204 |
| - kern = OffsetArray(rand(T, 3, 3), -1:1, -1:1); |
205 |
| - out1 = OffsetArray(view(similar(A, size(A) .+ 32), (1:98) .+ 32, (1:98) .+ 32), 1, 1); # stay away from the edges of A |
206 |
| - # out1 = OffsetArray(similar(A, size(A).-2), 1, 1); # stay away from the edges of A |
207 |
| - out2 = similar(out1); out3 = similar(out1); out4 = similar(out1); |
208 |
| - skern = SizedOffsetMatrix{T,-1,1,-1,1}(parent(kern)); |
| 204 | + for r ∈ (-1:1, -2:2) |
| 205 | + @show r |
| 206 | + fr = first(r); lr = last(r); |
| 207 | + kern = OffsetArray(rand(T, length(r), length(r)), r, r); |
| 208 | + out1 = OffsetArray(view(similar(A, size(A) .+ 32), (1+lr:100-lr) .+ 32, (1+lr:100-lr) .+ 32), lr, lr); # stay away from the edges of A |
| 209 | + # out1 = OffsetArray(similar(A, size(A).-2), 1, 1); # stay away from the edges of A |
| 210 | + out2 = similar(out1); out3 = similar(out1); out4 = similar(out1); |
| 211 | + skern = SizedOffsetMatrix{T,fr,lr,fr,lr}(parent(kern)); |
209 | 212 |
|
210 |
| - old2d!(out1, A, kern); |
211 |
| - avx2d!(out2, A, kern); |
212 |
| - @test out1 ≈ out2 |
| 213 | + old2d!(out1, A, kern); |
| 214 | + avx2d!(out2, A, kern); |
| 215 | + @test out1 ≈ out2 |
213 | 216 |
|
214 |
| - avx2douter!(out3, A, kern); |
215 |
| - @test out1 ≈ out3 |
| 217 | + avx2douter!(out3, A, kern); |
| 218 | + @test out1 ≈ out3 |
216 | 219 |
|
217 |
| - fill!(out2, NaN); avx2d!(out2, A, skern); |
218 |
| - @test out1 ≈ out2 |
| 220 | + fill!(out2, NaN); avx2d!(out2, A, skern); |
| 221 | + @test out1 ≈ out2 |
219 | 222 |
|
220 |
| - fill!(out2, NaN); avx2douter!(out2, At', kern); |
221 |
| - @test out1 ≈ out2 |
| 223 | + fill!(out2, NaN); avx2douter!(out2, At', kern); |
| 224 | + @test out1 ≈ out2 |
222 | 225 |
|
223 |
| - fill!(out2, NaN); avx2douter!(out2', A, kern); |
224 |
| - @test out1 ≈ out2' |
| 226 | + fill!(out2, NaN); avx2douter!(out2', A, kern); |
| 227 | + @test out1 ≈ out2' |
225 | 228 |
|
226 |
| - fill!(out2, NaN); avx2douter!(out2', At', kern); |
227 |
| - @test out1 ≈ out2' |
| 229 | + fill!(out2, NaN); avx2douter!(out2', At', kern); |
| 230 | + @test out1 ≈ out2' |
228 | 231 |
|
229 |
| - fill!(out3, NaN); avx2douter!(out3, A, skern); |
230 |
| - @test out1 ≈ out3 |
| 232 | + fill!(out3, NaN); avx2douter!(out3, A, skern); |
| 233 | + @test out1 ≈ out3 |
231 | 234 |
|
232 |
| - fill!(out3, NaN); avx2dunrolled!(out3, A, skern); |
233 |
| - @test out1 ≈ out3 |
| 235 | + if r == -1:1 |
| 236 | + fill!(out3, NaN); avx2dunrolled!(out3, A, skern); |
| 237 | + @test out1 ≈ out3 |
234 | 238 |
|
235 |
| - fill!(out3, NaN); avx2dunrolled2x2!(out3, A, skern); |
236 |
| - @test out1 ≈ out3 |
| 239 | + fill!(out3, NaN); avx2dunrolled2x2!(out3, A, skern); |
| 240 | + @test out1 ≈ out3 |
237 | 241 |
|
238 |
| - fill!(out3, NaN); avx2dunrolled3x3!(out3, A, skern); |
239 |
| - @test out1 ≈ out3 |
240 |
| - |
241 |
| - @test avxgeneric!(out4, A, kern) ≈ out1 |
242 |
| - fill!(out4, NaN); |
243 |
| - @test avxgeneric!(out4, A, skern) ≈ out1 |
| 242 | + fill!(out3, NaN); avx2dunrolled3x3!(out3, A, skern); |
| 243 | + @test out1 ≈ out3 |
| 244 | + end |
| 245 | + |
| 246 | + @test avxgeneric!(out4, A, kern) ≈ out1 |
| 247 | + fill!(out4, NaN); |
| 248 | + @test avxgeneric!(out4, A, skern) ≈ out1 |
244 | 249 |
|
245 |
| - fill!(out4, NaN); @test avxgeneric2!(out4, A, kern) ≈ out1 |
246 |
| - fill!(out4, NaN); @test avxgeneric2!(out4, A, skern) ≈ out1 |
247 |
| - fill!(out4, NaN); @test avxgeneric2!(out4, At', kern) ≈ out1 |
248 |
| - fill!(out4, NaN); @test avxgeneric2!(out4, At', skern) ≈ out1 |
249 |
| - fill!(out4, NaN); @test avxgeneric2!(out4', A, kern) ≈ out1 |
250 |
| - fill!(out4, NaN); @test avxgeneric2!(out4', A, skern) ≈ out1 |
251 |
| - fill!(out4, NaN); @test avxgeneric2!(out4', At', kern) ≈ out1 |
252 |
| - fill!(out4, NaN); @test avxgeneric2!(out4', At', skern) ≈ out1 |
| 250 | + fill!(out4, NaN); @test avxgeneric2!(out4, A, kern) ≈ out1 |
| 251 | + fill!(out4, NaN); @test avxgeneric2!(out4, A, skern) ≈ out1 |
| 252 | + fill!(out4, NaN); @test avxgeneric2!(out4, At', kern) ≈ out1 |
| 253 | + fill!(out4, NaN); @test avxgeneric2!(out4, At', skern) ≈ out1 |
| 254 | + fill!(out4, NaN); @test avxgeneric2!(out4', A, kern) ≈ out1 |
| 255 | + fill!(out4, NaN); @test avxgeneric2!(out4', A, skern) ≈ out1 |
| 256 | + fill!(out4, NaN); @test avxgeneric2!(out4', At', kern) ≈ out1 |
| 257 | + fill!(out4, NaN); @test avxgeneric2!(out4', At', skern) ≈ out1 |
| 258 | + end |
253 | 259 | end
|
254 | 260 |
|
255 | 261 |
|
|
0 commit comments