Skip to content

Commit 550a693

Browse files
committed
implement nditeration
1 parent 7fac139 commit 550a693

File tree

4 files changed

+214
-21
lines changed

4 files changed

+214
-21
lines changed

src/KernelAbstractions.jl

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -167,31 +167,14 @@ struct CUDA <: GPU end
167167
# struct AMD <: GPU end
168168
# struct Intel <: GPU end
169169

170+
include("nditeration.jl")
171+
using .NDIteration
172+
import .NDIteration: get
173+
170174
###
171175
# Kernel closure struct
172176
###
173177

174-
import Base.@pure
175-
176-
abstract type _Size end
177-
struct DynamicSize <: _Size end
178-
struct StaticSize{S} <: _Size
179-
function StaticSize{S}() where S
180-
new{S::Tuple{Vararg{Int}}}()
181-
end
182-
end
183-
184-
@pure StaticSize(s::Tuple{Vararg{Int}}) = StaticSize{s}()
185-
@pure StaticSize(s::Int...) = StaticSize{s}()
186-
@pure StaticSize(s::Type{<:Tuple}) = StaticSize{tuple(s.parameters...)}()
187-
188-
# Some @pure convenience functions for `StaticSize`
189-
@pure get(::Type{StaticSize{S}}) where {S} = S
190-
@pure get(::StaticSize{S}) where {S} = S
191-
@pure Base.getindex(::StaticSize{S}, i::Int) where {S} = i <= length(S) ? S[i] : 1
192-
@pure Base.ndims(::StaticSize{S}) where {S} = length(S)
193-
@pure Base.length(::StaticSize{S}) where {S} = prod(S)
194-
195178
"""
196179
Kernel{Device, WorkgroupSize, NDRange, Func}
197180

src/nditeration.jl

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
module NDIteration
2+
3+
export _Size, StaticSize, DynamicSize, get
4+
export NDRange, blocks, workitems, expand
5+
6+
import Base.@pure
7+
8+
abstract type _Size end
9+
struct DynamicSize <: _Size end
10+
struct StaticSize{S} <: _Size
11+
function StaticSize{S}() where S
12+
new{S::Tuple{Vararg{Int}}}()
13+
end
14+
end
15+
16+
@pure StaticSize(s::Tuple{Vararg{Int}}) = StaticSize{s}()
17+
@pure StaticSize(s::Int...) = StaticSize{s}()
18+
@pure StaticSize(s::Type{<:Tuple}) = StaticSize{tuple(s.parameters...)}()
19+
20+
# Some @pure convenience functions for `StaticSize`
21+
@pure get(::Type{StaticSize{S}}) where {S} = S
22+
@pure get(::StaticSize{S}) where {S} = S
23+
@pure Base.getindex(::StaticSize{S}, i::Int) where {S} = i <= length(S) ? S[i] : 1
24+
@pure Base.ndims(::StaticSize{S}) where {S} = length(S)
25+
@pure Base.length(::StaticSize{S}) where {S} = prod(S)
26+
27+
28+
"""
29+
NDRange
30+
31+
Encodes a blocked iteration space.
32+
33+
# Example
34+
```
35+
ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((256, 256)), CartesianIndices((32, 32)))
36+
for block in ndrange
37+
for items in workitems(ndrange)
38+
I = expand(ndrange, block, items)
39+
checkbounds(Bool, A, I) || continue
40+
@inbounds A[I] = 2*A[I]
41+
end
42+
end
43+
```
44+
"""
45+
struct NDRange{N, StaticBlocks, StaticWorkitems, DynamicBlock, DynamicWorkitems}
46+
blocks::DynamicBlock
47+
workitems::DynamicWorkitems
48+
49+
function NDRange{N, B, W}() where {N, B, W}
50+
new{N, B, W, Nothing, Nothing}(nothing, nothing)
51+
end
52+
53+
function NDRange{N, B, W}(blocks, workitems) where {N, B, W}
54+
new{N, B, W, typeof(blocks), typeof(workitems)}(blocks, workitems)
55+
end
56+
end
57+
58+
@inline workitems(range::NDRange{N, B, W}) where {N,B,W<:DynamicSize} = range.workitems::CartesianIndices{N}
59+
@inline workitems(range::NDRange{N, B, W}) where {N,B,W<:StaticSize} = CartesianIndices(get(W))::CartesianIndices{N}
60+
@inline blocks(range::NDRange{N, B}) where {N,B<:DynamicSize} = range.blocks::CartesianIndices{N}
61+
@inline blocks(range::NDRange{N, B}) where {N,B<:StaticSize} = CartesianIndices(get(B))::CartesianIndices{N}
62+
63+
import Base.iterate
64+
@inline iterate(range::NDRange) = iterate(blocks(range))
65+
@inline iterate(range::NDRange, state) = iterate(blocks(range), state)
66+
67+
Base.length(range::NDRange) = length(blocks(range))
68+
69+
@inline function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::CartesianIndex{N}) where N
70+
nI = ntuple(Val(N)) do I
71+
Base.@_inline_meta
72+
stride = size(workitems(ndrange), I)
73+
gidx = groupidx.I[I]
74+
(gidx-1)*stride + idx.I[I]
75+
end
76+
CartesianIndex(nI)
77+
end
78+
79+
Base.@propagate_inbounds function expand(ndrange::NDRange, groupidx::Integer, idx::Integer)
80+
expand(ndrange, blocks(ndrange)[groupidx], workitems(ndrange)[idx])
81+
end
82+
83+
Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::CartesianIndex{N}, idx::Integer) where N
84+
expand(ndrange, groupidx, workitems(ndrange)[idx])
85+
end
86+
87+
Base.@propagate_inbounds function expand(ndrange::NDRange{N}, groupidx::Integer, idx::CartesianIndex{N}) where N
88+
expand(ndrange, blocks(ndrange)[groupidx], idx)
89+
end
90+
91+
"""
92+
partition(ndrange, workgroupsize)
93+
94+
Splits the maximum size of the iteration space by the workgroupsize.
95+
Returns the number of workgroups necessary and whether the last workgroup
96+
needs to perform dynamic bounds-checking.
97+
"""
98+
@inline function partition(ndrange, workgroupsize)
99+
@assert length(workgroupsize) <= length(ndrange)
100+
if length(workgroupsize) < length(ndrange)
101+
# pad workgroupsize with ones
102+
workgroupsize = ntuple(length(ndrange)) do I
103+
if I > length(workgroupsize)
104+
return 1
105+
else
106+
return workgroupsize[I]
107+
end
108+
end
109+
end
110+
111+
dynamic = false
112+
blocks = ntuple(length(ndrange)) do I
113+
dynamic |= mod(ndrange[I], workgroupsize[I]) != 0
114+
return fld1(ndrange[I], workgroupsize[I])
115+
end
116+
117+
return blocks, workgroupsize, dynamic
118+
end
119+
120+
end #module

test/nditeration.jl

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
using KernelAbstractions
2+
using KernelAbstractions.NDIteration
3+
using Test
4+
5+
@testset "iteration" begin
6+
let ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((256, 256)), CartesianIndices((32, 32)));
7+
@test length(ndrange) == 256*256
8+
@test all(p->p[1]==p[2], zip(ndrange, CartesianIndices((256, 256))))
9+
end
10+
let ndrange = NDRange{2, StaticSize{(256,256)}, DynamicSize}(nothing, CartesianIndices((32, 32)));
11+
@test length(ndrange) == 256*256
12+
@test all(p->p[1]==p[2], zip(ndrange, CartesianIndices((256, 256))))
13+
end
14+
end
15+
16+
# GPU scenario where we get a linear index into workitems/blocks
17+
function linear_iteration(ndrange)
18+
idx = Array{CartesianIndex{2}}(undef, length(blocks(ndrange)) * length(workitems(ndrange)))
19+
for i in 1:length(blocks(ndrange))
20+
for j in 1:length(workitems(ndrange))
21+
I = j + (i-1) * length(workitems(ndrange))
22+
idx[I] = expand(ndrange, i, j)
23+
end
24+
end
25+
return idx
26+
end
27+
28+
function check(idx, offset, offset_x, offset_y, Dim_x, Dim_y)
29+
N = Dim_x * Dim_y
30+
all(p->p[1]==p[2], zip(idx[offset*N .+ 1:N], CartesianIndices((offset_x*Dim_x .+ 1:Dim_x, offset_y*Dim_y .+ 1:Dim_y))))
31+
end
32+
33+
@testset "linear_iteration" begin
34+
Dim_x = 32
35+
Dim_y = 32
36+
let ndrange = NDRange{2, StaticSize{(4,4)}, StaticSize{(Dim_x,Dim_y)}}();
37+
idx = linear_iteration(ndrange)
38+
for (i, I) in zip(1:length(blocks(ndrange)), blocks(ndrange))
39+
I = Tuple(I)
40+
@test check(idx, i-1, ntuple(i->I[i]-1, length(I))..., Dim_x, Dim_y)
41+
end
42+
end
43+
let ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((4,4)), CartesianIndices((Dim_x, Dim_y)));
44+
idx = linear_iteration(ndrange)
45+
for (i, I) in zip(1:length(blocks(ndrange)), blocks(ndrange))
46+
I = Tuple(I)
47+
@test check(idx, i-1, ntuple(i->I[i]-1, length(I))..., Dim_x, Dim_y)
48+
end
49+
end
50+
51+
Dim_x = 32
52+
Dim_y = 1
53+
let ndrange = NDRange{2, StaticSize{(4,4*32)}, StaticSize{(Dim_x,Dim_y)}}();
54+
idx = linear_iteration(ndrange)
55+
N = length(workitems(ndrange))
56+
for (i, I) in zip(1:length(blocks(ndrange)), blocks(ndrange))
57+
I = Tuple(I)
58+
@test check(idx, i-1, ntuple(i->I[i]-1, length(I))..., Dim_x, Dim_y)
59+
end
60+
end
61+
let ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((4,4*32)), CartesianIndices((Dim_x, Dim_y)));
62+
idx = linear_iteration(ndrange)
63+
for (i, I) in zip(1:length(blocks(ndrange)), blocks(ndrange))
64+
I = Tuple(I)
65+
@test check(idx, i-1, ntuple(i->I[i]-1, length(I))..., Dim_x, Dim_y)
66+
end
67+
end
68+
69+
Dim_x = 1
70+
Dim_y = 32
71+
let ndrange = NDRange{2, StaticSize{(4*32,4)}, StaticSize{(Dim_x,Dim_y)}}();
72+
idx = linear_iteration(ndrange)
73+
N = length(workitems(ndrange))
74+
for (i, I) in zip(1:length(blocks(ndrange)), blocks(ndrange))
75+
I = Tuple(I)
76+
@test check(idx, i-1, ntuple(i->I[i]-1, length(I))..., Dim_x, Dim_y)
77+
end
78+
end
79+
let ndrange = NDRange{2, DynamicSize, DynamicSize}(CartesianIndices((4*32,4)), CartesianIndices((Dim_x, Dim_y)));
80+
idx = linear_iteration(ndrange)
81+
for (i, I) in zip(1:length(blocks(ndrange)), blocks(ndrange))
82+
I = Tuple(I)
83+
@test check(idx, i-1, ntuple(i->I[i]-1, length(I))..., Dim_x, Dim_y)
84+
end
85+
end
86+
end

test/runtests.jl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,8 @@ end
1313
include("unroll.jl")
1414
end
1515

16+
@testset "NDIteration" begin
17+
include("nditeration.jl")
18+
end
19+
1620
include("examples.jl")

0 commit comments

Comments
 (0)