Skip to content

Commit da1d96d

Browse files
committed
don't export KI and fix POCL.synchronize
1 parent b0013f1 commit da1d96d

File tree

6 files changed

+21
-10
lines changed

6 files changed

+21
-10
lines changed

examples/histogram.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# INCLUDE ROCM
22
using KernelAbstractions, Test
33
using KernelAbstractions: @atomic, @atomicswap, @atomicreplace
4+
import KernelAbstractions.KernelIntrinsics as KI
5+
46
include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # Load backend
57

68
# Function to use as a baseline for CPU metrics

examples/performant_matmul.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
using KernelAbstractions
2+
import KernelAbstractions.KernelIntrinsics as KI
3+
24
using StaticArrays
35
using Test
46
using Random
7+
58
include(joinpath(dirname(pathof(KernelAbstractions)), "../examples/utils.jl")) # Load backend
69

710
# We use a TILE_DIM of 16 as a safe value since while

src/KernelAbstractions.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,8 @@ Abstract type for all KernelAbstractions backends.
200200
abstract type Backend end
201201

202202
include("intrinsics.jl")
203-
import .KernelIntrinsics: KernelIntrinsics, KI
204-
export KernelIntrinsics, KI
203+
import .KernelIntrinsics as KI
204+
export KernelIntrinsics
205205

206206
###
207207
# Kernel language

src/intrinsics.jl

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
2-
# `KernelIntrinics`/`KI`
2+
# `KernelIntrinsics`
33
4-
The `KernelIntrinics` (or `KI`) module defines the API interface for backends to define various lower-level device and
4+
The `KernelIntrinsics` (or `KI`) module defines the API interface for backends to define various lower-level device and
55
host-side functionality. The `KI` intrinsics are used to define the higher-level device-side
66
intrinsics functionality in `KernelAbstractions`.
77
@@ -12,9 +12,6 @@ like allocating arrays on a backend.
1212
"""
1313
module KernelIntrinsics
1414

15-
const KI = KernelIntrinsics
16-
export KI
17-
1815
import ..KernelAbstractions: Backend
1916
import GPUCompiler: split_kwargs, assign_args!
2017

src/pocl/backend.jl

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,14 @@ KA.functional(::POCLBackend) = true
5757
KA.pagelock!(::POCLBackend, x) = nothing
5858

5959
KA.get_backend(::Array) = POCLBackend()
60-
KA.synchronize(::POCLBackend) = cl.finish(cl.queue())
60+
61+
## Implementation note:
62+
## The POCL backend uses `Base.Array` as it's array type, so the external operations
63+
## `broadcast`, `*` and other high-level operations are handled by Julia. In order
64+
## to provide the same memory synchronization semantics as other backends, we
65+
## must synchronize upon kernel launch and can't rely on synchronization upon
66+
## array access. Therefore, `synchronize` is a no-op.
67+
KA.synchronize(::POCLBackend) = nothing
6168
KA.supports_float64(::POCLBackend) = true
6269
KA.supports_unified(::POCLBackend) = true
6370

@@ -149,9 +156,9 @@ end
149156
function (obj::KI.Kernel{POCLBackend})(args...; numworkgroups = 1, workgroupsize = 1)
150157
KI.check_launch_args(numworkgroups, workgroupsize)
151158

152-
local_size = (workgroupsize..., ntuple(_->1, 3-length(workgroupsize))...,)
159+
local_size = (workgroupsize..., ntuple(_ -> 1, 3 - length(workgroupsize))...)
153160

154-
numworkgroups = (numworkgroups..., ntuple(_->1, 3-length(numworkgroups))...,)
161+
numworkgroups = (numworkgroups..., ntuple(_ -> 1, 3 - length(numworkgroups))...)
155162
global_size = local_size .* numworkgroups
156163

157164
event = obj.kern(args...; local_size, global_size)

test/intrinsics.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import KernelAbstractions.KernelIntrinsics as KI
2+
13
function test_intrinsics_kernel(results)
24
# Test all intrinsics return NamedTuples with x, y, z fields
35
global_size = KI.get_global_size()

0 commit comments

Comments
 (0)