Skip to content

Commit 1252930

Browse files
committed
Implement queue
1 parent d6f478d commit 1252930

File tree

3 files changed

+60
-23
lines changed

3 files changed

+60
-23
lines changed

src/pocl/backend.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ function (obj::KA.Kernel{POCLBackend})(args...; ndrange=nothing, workgroupsize=n
110110
local_size = items
111111
kernel(ctx, args...; global_size, local_size)
112112

113+
cl.finish(cl.queue()) # TODO, would waiting on an event be cheaper?
113114
return nothing
114115
end
115116

src/pocl/nanoOpenCL.jl

Lines changed: 57 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,10 @@ const cl_kernel_work_group_info = cl_uint
476476

477477
const cl_kernel_sub_group_info = cl_uint
478478

479+
const cl_device_svm_capabilities = cl_bitfield
480+
481+
const cl_command_queue_properties = cl_bitfield
482+
479483
@checked function clGetPlatformIDs(num_entries, platforms, num_platforms)
480484
@ccall libopencl.clGetPlatformIDs(num_entries::cl_uint, platforms::Ptr{cl_platform_id},
481485
num_platforms::Ptr{cl_uint})::cl_int
@@ -589,6 +593,20 @@ end
589593
event::Ptr{cl_event})::cl_int
590594
end
591595

596+
function clCreateCommandQueue(context, device, properties, errcode_ret)
597+
@ccall libopencl.clCreateCommandQueue(context::cl_context, device::cl_device_id,
598+
properties::cl_command_queue_properties,
599+
errcode_ret::Ptr{cl_int})::cl_command_queue
600+
end
601+
602+
@checked function clReleaseCommandQueue(command_queue)
603+
@ccall libopencl.clReleaseCommandQueue(command_queue::cl_command_queue)::cl_int
604+
end
605+
606+
@checked function clFinish(command_queue)
607+
@ccall libopencl.clFinish(command_queue::cl_command_queue)::cl_int
608+
end
609+
592610
# Init
593611

594612
# lazy initialization
@@ -886,16 +904,6 @@ function Context(device::Device)
886904
return Context(ctx_id)
887905
end
888906

889-
mutable struct Event
890-
const id::cl_event
891-
892-
function Event(evt_id)
893-
evt = new(evt_id)
894-
finalizer(clReleaseEvent, evt)
895-
return evt
896-
end
897-
end
898-
899907
mutable struct Program
900908
const id::cl_program
901909

@@ -1138,7 +1146,7 @@ function set_args!(k::Kernel, args...)
11381146
end
11391147

11401148
function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing;
1141-
global_work_offset=nothing, wait_on::Vector{Event}=Event[])
1149+
global_work_offset=nothing)
11421150
max_work_dim = device().max_work_item_dims
11431151
work_dim = length(global_work_size)
11441152
if work_dim > max_work_dim
@@ -1181,29 +1189,24 @@ function enqueue_kernel(k::Kernel, global_work_size, local_work_size=nothing;
11811189
# null local size means OpenCL decides
11821190
end
11831191

1184-
if !isempty(wait_on)
1185-
n_events = cl_uint(length(wait_on))
1186-
wait_event_ids = [evt.id for evt in wait_on]
1187-
else
1188-
n_events = cl_uint(0)
1189-
wait_event_ids = C_NULL
1190-
end
1192+
n_events = cl_uint(0)
1193+
wait_event_ids = C_NULL
1194+
ret_event = C_NULL
11911195

1192-
ret_event = Ref{cl_event}()
11931196
clEnqueueNDRangeKernel(queue(), k, cl_uint(work_dim), goffset, gsize, lsize,
11941197
n_events, wait_event_ids, ret_event)
1195-
return Event(ret_event[], retain=false)
1198+
return nothing
11961199
end
11971200

11981201
function call(k::Kernel, args...; global_size=(1,), local_size=nothing,
1199-
global_work_offset=nothing, wait_on::Vector{Event}=Event[],
1202+
global_work_offset=nothing,
12001203
svm_pointers::Vector{Ptr{Cvoid}}=Ptr{Cvoid}[])
12011204
set_args!(k, args...)
12021205
if !isempty(svm_pointers)
12031206
clSetKernelExecInfo(k, CL_KERNEL_EXEC_INFO_SVM_PTRS,
12041207
sizeof(svm_pointers), svm_pointers)
12051208
end
1206-
enqueue_kernel(k, global_size, local_size; global_work_offset, wait_on)
1209+
enqueue_kernel(k, global_size, local_size; global_work_offset)
12071210
end
12081211

12091212
# convert the argument values to match the kernel's signature (specified by the user)
@@ -1272,4 +1275,36 @@ function Base.getproperty(ki::KernelWorkGroupInfo, s::Symbol)
12721275
end
12731276
end
12741277

1278+
mutable struct CmdQueue
1279+
const id::cl_command_queue
1280+
1281+
function CmdQueue(q_id::cl_command_queue)
1282+
q = new(q_id)
1283+
finalizer(q) do _
1284+
clReleaseCommandQueue(q)
1285+
end
1286+
return q
1287+
end
1288+
end
1289+
1290+
Base.unsafe_convert(::Type{cl_command_queue}, q::CmdQueue) = q.id
1291+
1292+
function CmdQueue()
1293+
flags=cl_command_queue_properties(0)
1294+
err_code = Ref{Cint}()
1295+
queue_id = clCreateCommandQueue(context(), device(), flags, err_code)
1296+
if err_code[] != CL_SUCCESS
1297+
if queue_id != C_NULL
1298+
clReleaseCommandQueue(queue_id)
1299+
end
1300+
throw(CLError(err_code[]))
1301+
end
1302+
return CmdQueue(queue_id)
1303+
end
1304+
1305+
function finish(q::CmdQueue)
1306+
clFinish(q)
1307+
return q
1308+
end
1309+
12751310
end

src/pocl/pocl.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ function device()
2727
end::cl.Device
2828
end
2929

30+
# TODO: add a device context dict
3031
function context()
3132
get!(task_local_storage(), :POCLContext) do
3233
cl.Context(device())
@@ -36,7 +37,7 @@ end
3637
function queue()
3738
get!(task_local_storage(), :POCLQueue) do
3839
cl.CmdQueue()
39-
end::CmdQueue
40+
end::cl.CmdQueue
4041
end
4142

4243
using GPUCompiler

0 commit comments

Comments
 (0)