Skip to content

Commit ee76c60

Browse files
committed
Run Runic after explicit return rule addition
1 parent f0ea5b2 commit ee76c60

27 files changed

+88
-63
lines changed

docs/make.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ function main()
4444
push_preview = true,
4545
)
4646
end
47+
return
4748
end
4849

4950
isinteractive() || main()

examples/histogram.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,13 @@ function histogram!(histogram_output, input)
6262
backend = get_backend(histogram_output)
6363
# Need static block size
6464
kernel! = histogram_kernel!(backend, (256,))
65-
kernel!(histogram_output, input, ndrange = size(input))
65+
return kernel!(histogram_output, input, ndrange = size(input))
6666
end
6767

6868
function move(backend, input)
6969
# TODO replace with adapt(backend, input)
7070
out = KernelAbstractions.allocate(backend, eltype(input), size(input))
71-
KernelAbstractions.copyto!(backend, out, input)
71+
return KernelAbstractions.copyto!(backend, out, input)
7272
end
7373

7474
@testset "histogram tests" begin

examples/matmul.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ function matmul!(output, a, b)
2222
end
2323
backend = KernelAbstractions.get_backend(a)
2424
kernel! = matmul_kernel!(backend)
25-
kernel!(output, a, b, ndrange = size(output))
25+
return kernel!(output, a, b, ndrange = size(output))
2626
end
2727

2828
a = rand!(allocate(backend, Float32, 256, 123))

examples/memcopy.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function mycopy!(A, B)
1212
@assert get_backend(B) == backend
1313

1414
kernel = copy_kernel!(backend)
15-
kernel(A, B, ndrange = length(A))
15+
return kernel(A, B, ndrange = length(A))
1616
end
1717

1818
A = KernelAbstractions.zeros(backend, Float64, 128, 128)

examples/memcopy_static.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ function mycopy_static!(A, B)
1212
@assert get_backend(B) == backend
1313

1414
kernel = copy_kernel!(backend, 32, size(A)) # if size(A) varies this will cause recompilation
15-
kernel(A, B, ndrange = size(A))
15+
return kernel(A, B, ndrange = size(A))
1616
end
1717

1818
A = KernelAbstractions.zeros(backend, Float64, 128, 128)

examples/mpi.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ function cooperative_test!(req)
99
done, _ = MPI.Test(req, MPI.Status)
1010
yield()
1111
end
12+
return nothing
1213
end
1314

1415
function cooperative_wait(task::Task)
1516
while !Base.istaskdone(task)
1617
MPI.Iprobe(MPI.MPI_ANY_SOURCE, MPI.MPI_ANY_TAG, MPI.COMM_WORLD)
1718
yield()
1819
end
19-
wait(task)
20+
return wait(task)
2021
end
2122

2223
function exchange!(h_send_buf, d_recv_buf, h_recv_buf, src_rank, dst_rank, comm)
@@ -68,6 +69,7 @@ function main(backend)
6869
cooperative_wait(send_task)
6970

7071
@test all(d_recv_buf .== src_rank)
72+
return
7173
end
7274

7375
main(backend)

examples/naive_transpose.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ function naive_transpose!(a, b)
1717
@assert get_backend(b) == backend
1818
groupsize = KernelAbstractions.isgpu(backend) ? 256 : 1024
1919
kernel! = naive_transpose_kernel!(backend, groupsize)
20-
kernel!(a, b, ndrange = size(a))
20+
return kernel!(a, b, ndrange = size(a))
2121
end
2222

2323
# resolution of grid will be res*res

ext/EnzymeExt.jl

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ function EnzymeRules.forward(
6565
f = kernel.f
6666
fwd_kernel = similar(kernel, cpu_fwd)
6767

68-
fwd_kernel(f, args...; ndrange, workgroupsize)
68+
return fwd_kernel(f, args...; ndrange, workgroupsize)
6969
end
7070

7171
function EnzymeRules.forward(
@@ -79,7 +79,7 @@ function EnzymeRules.forward(
7979
f = kernel.f
8080
fwd_kernel = similar(kernel, gpu_fwd)
8181

82-
fwd_kernel(f, args...; ndrange, workgroupsize)
82+
return fwd_kernel(f, args...; ndrange, workgroupsize)
8383
end
8484

8585
_enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) =
@@ -278,18 +278,18 @@ function EnzymeRules.augmented_primal(
278278
if func.val isa Kernel{<:GPU}
279279
error("Active kernel arguments not supported on GPU")
280280
else
281-
Ref(EnzymeCore.make_zero(args[i].val))
281+
return Ref(EnzymeCore.make_zero(args[i].val))
282282
end
283283
else
284-
nothing
284+
return nothing
285285
end
286286
end
287287
args2 = ntuple(Val(N)) do i
288288
Base.@_inline_meta
289289
if args[i] isa Active
290-
MixedDuplicated(args[i].val, arg_refs[i])
290+
return MixedDuplicated(args[i].val, arg_refs[i])
291291
else
292-
args[i]
292+
return args[i]
293293
end
294294
end
295295

@@ -324,9 +324,9 @@ function EnzymeRules.reverse(
324324
args2 = ntuple(Val(N)) do i
325325
Base.@_inline_meta
326326
if args[i] isa Active
327-
MixedDuplicated(args[i].val, arg_refs[i])
327+
return MixedDuplicated(args[i].val, arg_refs[i])
328328
else
329-
args[i]
329+
return args[i]
330330
end
331331
end
332332

@@ -348,9 +348,9 @@ function EnzymeRules.reverse(
348348
res = ntuple(Val(N)) do i
349349
Base.@_inline_meta
350350
if args[i] isa Active
351-
arg_refs[i][]
351+
return arg_refs[i][]
352352
else
353-
nothing
353+
return nothing
354354
end
355355
end
356356
# Reverse synchronization right after the kernel launch

src/KernelAbstractions.jl

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ synchronize(backend)
5151
```
5252
"""
5353
macro kernel(expr)
54-
__kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
54+
return __kernel(expr, #=generate_cpu=# true, #=force_inbounds=# false)
5555
end
5656

5757
"""
@@ -69,7 +69,7 @@ This allows for two different configurations:
6969
"""
7070
macro kernel(ex...)
7171
if length(ex) == 1
72-
__kernel(ex[1], true, false)
72+
return __kernel(ex[1], true, false)
7373
else
7474
generate_cpu = true
7575
force_inbounds = false
@@ -89,7 +89,7 @@ macro kernel(ex...)
8989
)
9090
end
9191
end
92-
__kernel(ex[end], generate_cpu, force_inbounds)
92+
return __kernel(ex[end], generate_cpu, force_inbounds)
9393
end
9494
end
9595

@@ -167,7 +167,7 @@ a tuple corresponding to kernel configuration. In order to get
167167
the total size you can use `prod(@groupsize())`.
168168
"""
169169
macro groupsize()
170-
quote
170+
return quote
171171
$groupsize($(esc(:__ctx__)))
172172
end
173173
end
@@ -179,7 +179,7 @@ Query the ndrange on the backend. This function returns
179179
a tuple corresponding to kernel configuration.
180180
"""
181181
macro ndrange()
182-
quote
182+
return quote
183183
$size($ndrange($(esc(:__ctx__))))
184184
end
185185
end
@@ -193,7 +193,7 @@ macro localmem(T, dims)
193193
# Stay in sync with CUDAnative
194194
id = gensym("static_shmem")
195195

196-
quote
196+
return quote
197197
$SharedMemory($(esc(T)), Val($(esc(dims))), Val($(QuoteNode(id))))
198198
end
199199
end
@@ -214,7 +214,7 @@ macro private(T, dims)
214214
if dims isa Integer
215215
dims = (dims,)
216216
end
217-
quote
217+
return quote
218218
$Scratchpad($(esc(:__ctx__)), $(esc(T)), Val($(esc(dims))))
219219
end
220220
end
@@ -226,7 +226,7 @@ Creates a private local of `mem` per item in the workgroup. This can be safely u
226226
across [`@synchronize`](@ref) statements.
227227
"""
228228
macro private(expr)
229-
esc(expr)
229+
return esc(expr)
230230
end
231231

232232
"""
@@ -236,7 +236,7 @@ end
236236
that span workitems, or are reused across `@synchronize` statements.
237237
"""
238238
macro uniform(value)
239-
esc(value)
239+
return esc(value)
240240
end
241241

242242
"""
@@ -247,7 +247,7 @@ from each thread in the workgroup are visible in from all other threads in the
247247
workgroup.
248248
"""
249249
macro synchronize()
250-
quote
250+
return quote
251251
$__synchronize()
252252
end
253253
end
@@ -264,7 +264,7 @@ workgroup. `cond` is not allowed to have any visible sideffects.
264264
- `CPU`: This synchronization will always occur.
265265
"""
266266
macro synchronize(cond)
267-
quote
267+
return quote
268268
$(esc(cond)) && $__synchronize()
269269
end
270270
end
@@ -289,7 +289,7 @@ end
289289
```
290290
"""
291291
macro context()
292-
esc(:(__ctx__))
292+
return esc(:(__ctx__))
293293
end
294294

295295
"""
@@ -329,7 +329,7 @@ macro print(items...)
329329
end
330330
end
331331

332-
quote
332+
return quote
333333
$__print($(map(esc, args)...))
334334
end
335335
end
@@ -385,7 +385,7 @@ macro index(locale, args...)
385385
end
386386

387387
index_function = Symbol(:__index_, locale, :_, indexkind)
388-
Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
388+
return Expr(:call, GlobalRef(KernelAbstractions, index_function), esc(:__ctx__), map(esc, args)...)
389389
end
390390

391391
###
@@ -591,7 +591,7 @@ struct Kernel{Backend, WorkgroupSize <: _Size, NDRange <: _Size, Fun}
591591
end
592592

593593
function Base.similar(kernel::Kernel{D, WS, ND}, f::F) where {D, WS, ND, F}
594-
Kernel{D, WS, ND, F}(kernel.backend, f)
594+
return Kernel{D, WS, ND, F}(kernel.backend, f)
595595
end
596596

597597
workgroupsize(::Kernel{D, WorkgroupSize}) where {D, WorkgroupSize} = WorkgroupSize
@@ -701,7 +701,7 @@ end
701701
push!(args, item)
702702
end
703703

704-
quote
704+
return quote
705705
print($(args...))
706706
end
707707
end

src/cpu.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ function (obj::Kernel{CPU})(args...; ndrange = nothing, workgroupsize = nothing)
4343
return nothing
4444
end
4545

46-
__run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
46+
return __run(obj, ndrange, iterspace, args, dynamic, obj.backend.static)
4747
end
4848

4949
const CPU_GRAINSIZE = 1024 # Vectorization, 4x unrolling, minimal grain size
@@ -162,15 +162,15 @@ end
162162

163163
@inline function __index_Global_Linear(ctx, idx::CartesianIndex)
164164
I = @inbounds expand(__iterspace(ctx), __groupindex(ctx), idx)
165-
@inbounds LinearIndices(__ndrange(ctx))[I]
165+
return @inbounds LinearIndices(__ndrange(ctx))[I]
166166
end
167167

168168
@inline function __index_Local_Cartesian(_, idx::CartesianIndex)
169169
return idx
170170
end
171171

172172
@inline function __index_Group_Cartesian(ctx, ::CartesianIndex)
173-
__groupindex(ctx)
173+
return __groupindex(ctx)
174174
end
175175

176176
@inline function __index_Global_Cartesian(ctx, idx::CartesianIndex)
@@ -191,7 +191,7 @@ end
191191
# CPU implementation of shared memory
192192
###
193193
@inline function SharedMemory(::Type{T}, ::Val{Dims}, ::Val) where {T, Dims}
194-
MArray{__size(Dims), T}(undef)
194+
return MArray{__size(Dims), T}(undef)
195195
end
196196

197197
###
@@ -212,7 +212,7 @@ end
212212
# https://github.com/JuliaLang/julia/issues/39308
213213
@inline function aview(A, I::Vararg{Any, N}) where {N}
214214
J = Base.to_indices(A, I)
215-
Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
215+
return Base.unsafe_view(Base._maybe_reshape_parent(A, Base.index_ndims(J...)), J...)
216216
end
217217

218218
@inline function Base.getindex(A::ScratchArray{N}, idx) where {N}

0 commit comments

Comments
 (0)