Skip to content

Commit dc57cd0

Browse files
KernelIntrisics and related tweaks (#666)
* KernelIntrinsics Tweaks * Fix temporary AK compat * Improve KI tests
1 parent 03d4ab7 commit dc57cd0

File tree

4 files changed

+38
-37
lines changed

4 files changed

+38
-37
lines changed

.buildkite/pipeline.yml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,7 @@ steps:
109109
julia -e 'println("--- :julia: Developing oneAPI")
110110
using Pkg
111111
Pkg.add(url="https://github.com/christiangnrd/oneAPI.jl", rev="intrinsics")
112-
Pkg.develop(; name="AcceleratedKernels")'
113-
sed -i 's/^KernelAbstractions = "0\.9.*"/KernelAbstractions = "0.10"/' \${JULIA_DEPOT_PATH}/dev/AcceleratedKernels/Project.toml
112+
Pkg.add(url="https://github.com/christiangnrd/AcceleratedKernels.jl", rev="ka0.10simple")'
114113
julia -e 'println("--- :julia: Instantiating project")
115114
using Pkg
116115
Pkg.develop(; path=pwd())' || exit 3
@@ -141,8 +140,7 @@ steps:
141140
command: |
142141
julia -e 'println("--- :julia: Developing AMDGPU")
143142
using Pkg
144-
Pkg.develop(; name="AcceleratedKernels")'
145-
sed -i 's/^KernelAbstractions = "0\.9.*"/KernelAbstractions = "0.9, 0.10"/' \${JULIA_DEPOT_PATH}/dev/AcceleratedKernels/Project.toml
143+
Pkg.add(url="https://github.com/christiangnrd/AcceleratedKernels.jl", rev="ka0.10simple")'
146144
julia -e '
147145
using Pkg
148146
Pkg.add(url="https://github.com/christiangnrd/AMDGPU.jl", rev="intrinsics")

src/KernelAbstractions.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,7 @@ include("macros.jl")
833833
###
834834

835835
function Scratchpad end
836-
SharedMemory(t::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KI.localmemory(t, dims)
836+
SharedMemory(::Type{T}, dims::Val{Dims}, id::Val{Id}) where {T, Dims, Id} = KI.localmemory(T, dims)
837837

838838
__synchronize() = KI.barrier()
839839

src/intrinsics.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,14 @@ Returns the unique group ID.
103103
function get_group_id end
104104

105105
"""
106-
localmemory(T, dims)
106+
localmemory(::Type{T}, dims)
107107
108108
Declare memory that is local to a workgroup.
109109
110110
!!! note
111111
Backend implementations **must** implement:
112112
```
113-
@device_override localmemory(T::DataType, ::Val{Dims}) where {T, Dims}
113+
@device_override localmemory(::Type{T}, ::Val{Dims}) where {T, Dims}
114114
```
115115
As well as the on-device functionality.
116116
"""

test/intrinsics.jl

Lines changed: 33 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,25 @@
11
import KernelAbstractions.KernelIntrinsics as KI
22

3+
struct KernelData
4+
global_size::Int
5+
global_id::Int
6+
local_size::Int
7+
local_id::Int
8+
num_groups::Int
9+
group_id::Int
10+
end
311
function test_intrinsics_kernel(results)
4-
# Test all intrinsics return NamedTuples with x, y, z fields
5-
global_size = KI.get_global_size()
6-
global_id = KI.get_global_id()
7-
local_size = KI.get_local_size()
8-
local_id = KI.get_local_id()
9-
num_groups = KI.get_num_groups()
10-
group_id = KI.get_group_id()
11-
12-
if UInt32(global_id.x) <= UInt32(global_size.x)
13-
results[1, global_id.x] = global_id.x
14-
results[2, global_id.x] = local_id.x
15-
results[3, global_id.x] = group_id.x
16-
results[4, global_id.x] = global_size.x
17-
results[5, global_id.x] = local_size.x
18-
results[6, global_id.x] = num_groups.x
12+
i = KI.get_global_id().x
13+
14+
if i <= length(results)
15+
@inbounds results[i] = KernelData(
16+
KI.get_global_size().x,
17+
KI.get_global_id().x,
18+
KI.get_local_size().x,
19+
KI.get_local_id().x,
20+
KI.get_num_groups().x,
21+
KI.get_group_id().x
22+
)
1923
end
2024
return
2125
end
@@ -82,41 +86,40 @@ function intrinsics_testsuite(backend, AT)
8286
@test KI.multiprocessor_count(backend()) isa Int
8387

8488
# Test with small kernel
85-
N = 16
86-
results = AT(zeros(Int, 6, N))
89+
workgroupsize = 4
90+
numworkgroups = 4
91+
N = workgroupsize * numworkgroups
92+
results = AT(Vector{KernelData}(undef, N))
8793
kernel = KI.@kernel backend() launch = false test_intrinsics_kernel(results)
8894

8995
@test KI.kernel_max_work_group_size(kernel) isa Int
9096
@test KI.kernel_max_work_group_size(kernel; max_work_items = 1) == 1
9197

92-
kernel(results, workgroupsize = 4, numworkgroups = 4)
98+
kernel(results; workgroupsize, numworkgroups)
9399
KernelAbstractions.synchronize(backend())
94100

95101
host_results = Array(results)
96102

97103
# Verify results make sense
98-
for i in 1:N
99-
global_id_x, local_id_x, group_id_x, global_size_x, local_size_x, num_groups_x = host_results[:, i]
104+
for (i, k_data) in enumerate(host_results)
100105

101106
# Global IDs should be 1-based and sequential
102-
@test global_id_x == i
107+
@test k_data.global_id == i
103108

104109
# Global size should match our ndrange
105-
@test global_size_x == N
110+
@test k_data.global_size == N
106111

107-
# Local size should be 4 (our workgroupsize)
108-
@test local_size_x == 4
112+
@test k_data.local_size == workgroupsize
109113

110-
# Number of groups should be ceil(N/4) = 4
111-
@test num_groups_x == 4
114+
@test k_data.num_groups == numworkgroups
112115

113116
# Group ID should be 1-based
114-
expected_group = div(i - 1, 4) + 1
115-
@test group_id_x == expected_group
117+
expected_group = div(i - 1, numworkgroups) + 1
118+
@test k_data.group_id == expected_group
116119

117120
# Local ID should be 1-based within group
118-
expected_local = ((i - 1) % 4) + 1
119-
@test local_id_x == expected_local
121+
expected_local = ((i - 1) % workgroupsize) + 1
122+
@test k_data.local_id == expected_local
120123
end
121124
end
122125
end

0 commit comments

Comments
 (0)