Skip to content

Commit cff2113

Browse files
vchuravymwarusz
andcommitted
[CUDAKernels] Add an implicit dependency to synchronize against task-local streams
Co-authored-by: Maciej Waruszewski <[email protected]>
1 parent eff982d commit cff2113

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

lib/CUDAKernels/src/CUDAKernels.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ function threads_to_workgroupsize(threads, ndrange)
186186
end
187187
end
188188

189-
function (obj::Kernel{CUDADevice})(args...; ndrange=nothing, dependencies=nothing, workgroupsize=nothing, progress=yield)
189+
function (obj::Kernel{CUDADevice})(args...; ndrange=nothing, dependencies=Event(CUDADevice()), workgroupsize=nothing, progress=yield)
190190

191191
ndrange, workgroupsize, iterspace, dynamic = launch_config(obj, ndrange, workgroupsize)
192192
# this might not be the final context, since we may tune the workgroupsize

test/test.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ if backend != CPU
221221
event1 = kernel_empty(backend(), 1)(ndrange=1)
222222
event2 = kernel_empty(backend(), 1)(ndrange=0; dependencies=event1)
223223
@test event2 == MultiEvent(event1)
224-
event = kernel_empty(backend(), 1)(ndrange=0)
224+
event = kernel_empty(backend(), 1)(ndrange=0, dependencies=nothing)
225225
@test event == MultiEvent(nothing)
226226
end
227227
end

0 commit comments

Comments
 (0)