Skip to content

Commit 527dffc

Browse files
timholychriselrod
andauthored
Add more CartesianIndex tests (#336)
This mimics ImageFiltering's separable kernel filtering algorithm. Co-authored-by: Chris Elrod <[email protected]>
1 parent 03834d9 commit 527dffc

File tree

1 file changed

+72
-15
lines changed

1 file changed

+72
-15
lines changed

test/miscellaneous.jl

Lines changed: 72 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
using LoopVectorization
22
using LinearAlgebra
3+
using OffsetArrays
34
using Test
45

6+
if !isdefined(@__MODULE__, Symbol("@_avx"))
7+
include("testsetup.jl")
8+
end
9+
510
@testset "Miscellaneous" begin
611
# T = Float32
712
# Unum, Tnum = LoopVectorization.register_count() == 16 ? (1, 6) : (1, 8)
@@ -48,7 +53,7 @@ using Test
4853
s += t * y[n]
4954
end);
5055
ls = LoopVectorization.loopset(q);
51-
56+
5257
function dot3avx24(x, A, y)
5358
M, N = size(A)
5459
s = zero(promote_type(eltype(x), eltype(A), eltype(y)))
@@ -157,7 +162,7 @@ using Test
157162
# @test LoopVectorization.choose_order(lsvar) == (Symbol[:j,:i], :j, Symbol("##undefined##"), :j, 8, -1)
158163
# @test LoopVectorization.choose_order(lsvar) == (Symbol[:j,:i], :j, :i, :j, 2, 6)
159164
# end
160-
165+
161166
function myvar!(s², A, x̄)
162167
@.= 0
163168
@inbounds for i 1:size(A,2)
@@ -197,7 +202,7 @@ using Test
197202
Z[i, j] = acc
198203
end
199204
end
200-
function setcolumstovectorplus100avx!(Z::AbstractArray{T}, A) where {T}
205+
function setcolumstovectorplus100avx!(Z::AbstractArray{T}, A) where {T}
201206
@turbo for i = axes(A,1), j = axes(Z,2)
202207
acc = zero(T)
203208
acc = acc + A[i] + (26 + 74)
@@ -508,7 +513,7 @@ using Test
508513
function test_bit_shiftavx(counter)
509514
accu = zero(first(counter))
510515
@turbo for i eachindex(counter)
511-
accu += counter[i] << (-3 * 4 + 13)
516+
accu += counter[i] << (-3 * 4 + 13)
512517
end
513518
accu
514519
end
@@ -892,7 +897,7 @@ end
892897
setcolumstovectorplus100!(A, x)
893898
setcolumstovectorplus100avx!(A2, x)
894899
@test A == A2
895-
900+
896901
maxdeg = 20; nbasis = 1_000; dim = 15;
897902
r = T == Float32 ? (Int32(1):Int32(maxdeg+1)) : (1:maxdeg+1)
898903
basis = rand(r, (dim, nbasis));
@@ -1033,7 +1038,7 @@ end
10331038
@test c_re_1 c_re_2
10341039

10351040
@test loopinductvardivision(X1) loopinductvardivisionavx(X2)
1036-
1041+
10371042
mh = (
10381043
Wt_D_W = Matrix{T}(undef, 181, 181),
10391044
Wt = rand(T, 181, 191),
@@ -1050,7 +1055,7 @@ end
10501055
@test maxavx!(R, Q, true) == max.(vec(maximum(Q, dims=(2,3))), Rc)
10511056

10521057
@test manyreturntest(Q) manyreturntestavx(Q)
1053-
1058+
10541059
U0 = randn(T, 15, 17); E0 = randn(T, 17);
10551060
U1, E1 = splitintonoloop_reference(copy(U0), copy(E0));
10561061
U2, E2 = splitintonoloop(copy(U0), copy(E0));
@@ -1064,7 +1069,7 @@ end
10641069
@test all(isequal(81), powcseliteral!(E0))
10651070
@test all(isequal(81), powcsesymbol!(E3))
10661071

1067-
1072+
10681073
@test isapprox(
10691074
maybe_const_issue144!(zeros(T, 3,4), (value=one(T),), collect(reshape(1:12, 3,4)), ones(T, 4)),
10701075
maybe_const_issue144_avx!(zeros(T,3,4), (value=one(T),), collect(reshape(1:12, 3,4)), ones(T,4)),
@@ -1144,13 +1149,48 @@ end
11441149
s
11451150
end
11461151

1147-
for T (Float32, Float64)
1152+
function smoothdim_kernel_tile!(out, z, src::AbstractArray, kernel::AbstractVector, Rpre::CartesianIndices, axout_tile, Rpost::CartesianIndices)
1153+
# z is an "overflow-safe zero". This is needed in cases where both `src` and `kernel` have eltypes vulnerable to arithmetic overflow.
1154+
axkernel = axes(kernel, 1)
1155+
for Ipost in Rpost
1156+
for i in axout_tile
1157+
for Ipre in Rpre
1158+
tmp = convert(eltype(out), z)
1159+
for j in axkernel
1160+
tmp += oftype(z, src[Ipre,i+j,Ipost])*kernel[j]
1161+
end
1162+
out[Ipre,i,Ipost] = tmp
1163+
end
1164+
end
1165+
end
1166+
out
1167+
end
1168+
function smoothdim_kernel_tile_avx!(out, z, src::AbstractArray, kernel::AbstractVector, Rpre::CartesianIndices, axout_tile, Rpost::CartesianIndices)
1169+
axkernel = axes(kernel, 1)
1170+
zout = convert(eltype(out), z)
1171+
for Ipost in Rpost
1172+
for i in axout_tile
1173+
@turbo for Ipre in Rpre
1174+
tmp = zout
1175+
# tmp = convert(eltype(out), z) # failing to hoist this leads to an "UndefVarError: tmp not defined"
1176+
for j in axkernel
1177+
tmp += oftype(z, src[Ipre,i+j,Ipost])*kernel[j]
1178+
end
1179+
out[Ipre,i,Ipost] = tmp
1180+
end
1181+
end
1182+
end
1183+
out
1184+
end
1185+
1186+
1187+
for T (UInt8,Float32, Float64)
11481188
@testset "Mixed CartesianIndex/Int indexing" begin
11491189
@show T, @__LINE__
11501190
# A demo similar to the exponential filtering demo from https://julialang.org/blog/2016/02/iteration/,
11511191
# but with no loop-carried dependency.
11521192

1153-
# s = dest1;
1193+
# s = dest1;
11541194
# ifirst, ilast = first(axes(x, d)), last(axes(x, d))
11551195
# ls = LoopVectorization.@turbo_debug for Ipost in Rpost, i = ifirst:ilast, Ipre in Rpre
11561196
# xi = x[Ipre, i, Ipost]
@@ -1160,8 +1200,8 @@ end
11601200
# LoopVectorization.choose_order(ls);
11611201

11621202
M = 11;
1163-
x = rand(M,M,M,M,M);
1164-
dest1, dest2 = similar(x), similar(x);
1203+
x = rand(T,M,M,M,M,M);
1204+
dest1, dest2 = similar(x,float(T)), similar(x,float(T));
11651205
α = 0.3
11661206
for d = 1:ndims(x)
11671207
# @show d
@@ -1173,11 +1213,28 @@ end
11731213
fill!(dest2, NaN); smoothdim_ifelse_avx!(dest2, x, α, Rpre, axes(x, d), Rpost);
11741214
@test dest1 dest2
11751215
end
1216+
1217+
# Mimic of ImageFiltering/ArrayFiltering's separable filters
1218+
σ = 3
1219+
ax = OffsetArray(-6:6, -6:6)
1220+
kernel = float(T)[exp(-i^2/(2*σ^2)) for i in ax] # 1-d Gaussian kernel
1221+
srcax = 1+firstindex(ax):M+lastindex(ax)
1222+
Mpad = length(srcax)
1223+
src = OffsetArray(rand(T,Mpad,Mpad,Mpad,Mpad,Mpad),srcax,srcax,srcax,srcax,srcax)
1224+
dest1 = Array{float(T),5}(undef,M,M,M,M,M)
1225+
dest2 = similar(dest1)
1226+
for d = 1:ndims(src)
1227+
Rpre = CartesianIndices(axes(dest1)[1:d-1]);
1228+
Rpost = CartesianIndices(axes(dest1)[d+1:end]);
1229+
smoothdim_kernel_tile!( dest1, float(zero(T)), src, kernel, Rpre, axes(dest1, d), Rpost);
1230+
smoothdim_kernel_tile_avx!(dest2, float(zero(T)), src, kernel, Rpre, axes(dest2, d), Rpost);
1231+
@test dest1 dest2
1232+
end
11761233
end
11771234
end
11781235

11791236

1180-
function mul1!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T
1237+
function mul1!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T
11811238
packedstride = size(A, 1)
11821239
m, n = size(A)
11831240
@turbo for j eachindex(x)
@@ -1190,7 +1247,7 @@ end
11901247
end
11911248
y
11921249
end
1193-
function mul2!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T
1250+
function mul2!(y::Vector{T}, A::Matrix{UInt8}, x::Vector{T}) where T
11941251
packedstride = size(A, 1)
11951252
m, n = size(A)
11961253
for j eachindex(x)
@@ -1318,7 +1375,7 @@ end
13181375
A0 = Vector{Float64}(undef, i-1); A1 = similar(A0);
13191376
@test issue_257!(A0,G) issue_257_avx!(A1,G)
13201377
end
1321-
1378+
13221379
end
13231380

13241381

0 commit comments

Comments
 (0)