Skip to content

Commit f702c4a

Browse files
committed
WIP: support mixed Int/CartesianIndex indexing
ImageFiltering contains optimizations for separable filters: if the size along each dimension is `n[i]`, then per-pixel a fully-separable filter has cost `O(n[1] + ... + n[d])`, where as a non-separable filter has cost `O(n[1] * ... * n[d])`. Consequently this can be a very large savings. This PR aims to support such operations by extending support for the exponential-filter demo in https://julialang.org/blog/2016/02/iteration/.
1 parent 6967c61 commit f702c4a

File tree

2 files changed

+86
-23
lines changed

2 files changed

+86
-23
lines changed

src/reconstruct_loopset.jl

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
const NOpsType = Union{Int,Vector{Int}}
2+
13
function Loop(ls::LoopSet, ex::Expr, sym::Symbol, ::Type{<:AbstractUnitRange})
24
ssym = String(sym)
35
start = gensym(ssym*"_loopstart"); stop = gensym(ssym*"_loopstop"); loopsym = gensym(ssym * "_loop")
@@ -54,7 +56,7 @@ end
5456

5557
function ArrayReferenceMeta(
5658
ls::LoopSet, @nospecialize(ar::ArrayRefStruct), arraysymbolinds::Vector{Symbol},
57-
opsymbols::Vector{Symbol}, nopsv::Vector{Int}, expandedv::Vector{Bool}
59+
opsymbols::Vector{Symbol}, nopsv::Vector{NOpsType}, expandedv::Vector{Bool}
5860
)
5961
index_types = ar.index_types
6062
indices = ar.indices
@@ -73,7 +75,14 @@ function ArrayReferenceMeta(
7375
elseif index_types == ComputedIndex
7476
opsym = opsymbols[ind]
7577
if expandedv[ind]
76-
for j 0:nopsv[ind]-1
78+
nops = nopsv[ind]
79+
if isa(nops, Vector)
80+
n = first(nops)
81+
if all(isequal(n), nops)
82+
nops = n
83+
end
84+
end
85+
for j 0:nops-1
7786
pushfirst!(index_vec, expandedopname(opsym, j))
7887
pushfirst!(loopedindex, false)
7988
end
@@ -144,7 +153,7 @@ function add_mref!(ls::LoopSet, ar::ArrayReferenceMeta, i::Int, ::Type{<:Abstrac
144153
end
145154
function create_mrefs!(
146155
ls::LoopSet, arf::Vector{ArrayRefStruct}, as::Vector{Symbol}, os::Vector{Symbol},
147-
nopsv::Vector{Int}, expanded::Vector{Bool}, vargs
156+
nopsv::Vector{NOpsType}, expanded::Vector{Bool}, vargs
148157
)
149158
mrefs = Vector{ArrayReferenceMeta}(undef, length(arf))
150159
for i eachindex(arf)
@@ -230,14 +239,11 @@ function calcnops(ls::LoopSet, os::OperationStruct)
230239
offsets = ls.loopsymbol_offsets
231240
idxs = loopindex(ls, os.loopdeps, 0x04) # FIXME DRY
232241
iszero(length(idxs)) && return 1
233-
Δidxs = map(i->offsets[i+1]-offsets[i], idxs)
234-
nops = first(Δidxs)
235-
@assert all(isequal(nops), Δidxs)
236-
nops
242+
return map(i->offsets[i+1]-offsets[i], idxs)
237243
end
238-
function isexpanded(ls::LoopSet, ops::Vector{OperationStruct}, nopsv::Vector{Int}, i::Int)
244+
function isexpanded(ls::LoopSet, ops::Vector{OperationStruct}, nopsv::Vector{NOpsType}, i::Int)
239245
nops = nopsv[i]
240-
isone(nops) && return false
246+
(nops === 1 || nops == [1]) && return false
241247
os = ops[i]
242248
optyp = optype(os)
243249
if optyp == compute
@@ -250,7 +256,7 @@ function isexpanded(ls::LoopSet, ops::Vector{OperationStruct}, nopsv::Vector{Int
250256
end
251257

252258
function add_op!(
253-
ls::LoopSet, instr::Instruction, ops::Vector{OperationStruct}, nopsv::Vector{Int}, expandedv::Vector{Bool}, i::Int,
259+
ls::LoopSet, instr::Instruction, ops::Vector{OperationStruct}, nopsv::Vector{NOpsType}, expandedv::Vector{Bool}, i::Int,
254260
mrefs::Vector{ArrayReferenceMeta}, opsymbol, elementbytes::Int
255261
)
256262
os = ops[i]
@@ -272,9 +278,15 @@ function add_op!(
272278
push!(opoffsets, opoffsets[end] + 1)
273279
return
274280
end
281+
if isa(nops, Vector)
282+
n = first(nops)
283+
if all(isequal(n), nops)
284+
nops = n
285+
end
286+
end
275287
# if expanded, optyp must be either loopvalue, or compute (with loopvalues in its ancestry, not cutoff by loads)
276288
for offset = 0:nops-1
277-
sym = nops == 1 ? opsymbol : expandedopname(opsymbol, offset)
289+
sym = nops === 1 ? opsymbol : expandedopname(opsymbol, offset)
278290
op = Operation(
279291
length(operations(ls)), sym, elementbytes, instr,
280292
optyp, loopdependencies(ls, os, false, offset), reduceddependencies(ls, os, false, offset),
@@ -295,7 +307,7 @@ function add_parents_to_op!(ls::LoopSet, vparents::Vector{Operation}, up::Unsign
295307
for j offsets[i]+1:offsets[i+1] # if parents are expanded, add them all
296308
pushfirst!(vparents, ops[j])
297309
end
298-
end
310+
end
299311
else#if isexpanded
300312
# Do we want to require that all Δidxs are equal?
301313
# Because `CartesianIndex((2,3)) - 1` results in a methoderorr, I think this is reasonable for now
@@ -318,15 +330,15 @@ function add_parents_to_ops!(ls::LoopSet, ops::Vector{OperationStruct}, constoff
318330
pushpreamble!(ls, Expr(:(=), instr.instr, Expr(:macrocall, Symbol("@inbounds"), LineNumberNode(@__LINE__, Symbol(@__FILE__)), Expr(:ref, :vargs, constoffset))))
319331
end
320332
elseif !isloopvalue(op)
321-
add_parents_to_op!(ls, parents(op), ops[i].parents, k, Δ)
333+
add_parents_to_op!(ls, parents(op), ops[i].parents, k, Δ)
322334
end
323335
end
324336
end
325337
constoffset
326338
end
327339
function add_ops!(
328340
ls::LoopSet, instr::Vector{Instruction}, ops::Vector{OperationStruct}, mrefs::Vector{ArrayReferenceMeta},
329-
opsymbols::Vector{Symbol}, constoffset::Int, nopsv::Vector{Int}, expandedv::Vector{Bool}, elementbytes::Int
341+
opsymbols::Vector{Symbol}, constoffset::Int, nopsv::Vector{NOpsType}, expandedv::Vector{Bool}, elementbytes::Int
330342
)
331343
# @show ls.loopsymbols ls.loopsymbol_offsets
332344
for i eachindex(ops)
@@ -378,7 +390,7 @@ function avx_loopset(instr, ops, arf, AM, LPSYM, LB, vargs)
378390
resize!(ls.loop_order, ls.loopsymbol_offsets[end])
379391
arraysymbolinds = gen_array_syminds(AM)
380392
opsymbols = [gensym(:op) for _ eachindex(ops)]
381-
nopsv = calcnops.(Ref(ls), ops)
393+
nopsv = NOpsType[calcnops(ls, op) for op in ops]
382394
expandedv = [isexpanded(ls, ops, nopsv, i) for i eachindex(ops)]
383395
mrefs = create_mrefs!(ls, arf, arraysymbolinds, opsymbols, nopsv, expandedv, vargs)
384396
pushpreamble!(ls, Expr(:(=), ls.T, Expr(:call, :promote_type, [Expr(:call, :eltype, vptr(mref)) for mref mrefs]...)))
@@ -417,6 +429,3 @@ end
417429
ls = _avx_loopset(OPS.parameters, ARF.parameters, AM.parameters, LPSYM.parameters, LB.parameters, vargs)
418430
avx_body(ls, UT)
419431
end
420-
421-
422-

test/miscellaneous.jl

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1+
using LoopVectorization
2+
using LinearAlgebra
3+
using Test
14

25
@testset "Miscellaneous" begin
6+
37
Unum, Tnum = LoopVectorization.VectorizationBase.REGISTER_COUNT == 16 ? (3, 4) : (4, 6)
48
dot3q = :(for m 1:M, n 1:N
59
s += x[m] * A[m,n] * y[n]
@@ -48,7 +52,7 @@
4852
end
4953
s
5054
end
51-
55+
5256
subcolq = :(for i 1:size(A,2), j eachindex(x)
5357
B[j,i] = A[j,i] - x[j]
5458
end)
@@ -220,10 +224,10 @@
220224
# t = β₁ = β₂ = ρ = s = 0.0; weights = rand(1); nodes = rand(1); lomnibus(args...) = +(args...)
221225
# LoopVectorization.@avx_debug for i ∈ eachindex(weights, nodes)
222226
# s += weights[i] * lomnibus(nodes[i], t, β₁, β₂, ρ)
223-
# end
227+
# end
224228
# @macroexpand @avx for i ∈ eachindex(weights, nodes)
225229
# s += weights[i] * lomnibus(nodes[i], t, β₁, β₂, ρ)
226-
# end
230+
# end
227231
function softmax3_core!(lse, qq, xx, tmpmax, maxk, nk)
228232
for k in Base.OneTo(maxk)
229233
@inbounds for i in eachindex(lse)
@@ -486,7 +490,7 @@
486490
out[i] = 1 + i
487491
end
488492
end
489-
493+
490494
for T (Float32, Float64)
491495
@show T, @__LINE__
492496
A = randn(T, 199, 498);
@@ -550,7 +554,7 @@
550554
@test view(vec(B1), r) == view(vec(B2), r)
551555
fill!(B2, NaN); test_for_with_different_index_avx!(B2, A, C, start_sample, num_samples)
552556
@test view(vec(B1), r) == view(vec(B2), r)
553-
557+
554558
ni, nj, nk = (127, 113, 13)
555559
x = rand(T, ni, nj, nk);
556560
q1 = similar(x);
@@ -652,4 +656,54 @@
652656
one_plus_i_avx!(out2)
653657
@test out1 == out2
654658
end
659+
660+
@testset "Mixed CartesianIndex/Int indexing" begin
661+
# A demo similar to the exponential filtering demo from https://julialang.org/blog/2016/02/iteration/,
662+
# but with no loop-carried dependency.
663+
function smoothdim!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
664+
ifirst, ilast = first(irng), last(irng)
665+
ifirst > ilast && return s
666+
for Ipost in Rpost
667+
# Initialize the first value along the filtered dimension
668+
for Ipre in Rpre
669+
s[Ipre, ifirst, Ipost] = x[Ipre, ifirst, Ipost]
670+
end
671+
# Handle all other entries
672+
for i = ifirst+1:ilast
673+
for Ipre in Rpre
674+
s[Ipre, i, Ipost] = α*x[Ipre, i, Ipost] + (1-α)*x[Ipre, i-1, Ipost]
675+
end
676+
end
677+
end
678+
s
679+
end
680+
function smoothdim_avx!(s, x, α, Rpre, irng::AbstractUnitRange, Rpost)
681+
ifirst, ilast = first(irng), last(irng)
682+
ifirst > ilast && return s
683+
@avx for Ipost in Rpost
684+
# Initialize the first value along the filtered dimension
685+
for Ipre in Rpre
686+
s[Ipre, ifirst, Ipost] = x[Ipre, ifirst, Ipost]
687+
end
688+
# Handle all other entries
689+
for i = ifirst+1:ilast
690+
for Ipre in Rpre
691+
s[Ipre, i, Ipost] = α*x[Ipre, i, Ipost] + (1-α)*x[Ipre, i-1, Ipost]
692+
end
693+
end
694+
end
695+
s
696+
end
697+
698+
x = rand(11,11,11) # ,11,11)
699+
dest1, dest2 = similar(x), similar(x)
700+
α = 0.3
701+
for d = 1:ndims(x)
702+
Rpre = CartesianIndices(axes(x)[1:d-1])
703+
Rpost = CartesianIndices(axes(x)[d+1:end])
704+
smoothdim!(dest1, x, α, Rpre, axes(x, d), Rpost)
705+
smoothdim_avx!(dest2, x, α, Rpre, axes(x, d), Rpost)
706+
@test dest1 dest2
707+
end
708+
end
655709
end

0 commit comments

Comments
 (0)