Skip to content

Commit da904eb

Browse files
committed
Forbid divergent execution of work-group barriers
1 parent 8a87f77 commit da904eb

File tree

3 files changed

+32
-11
lines changed

3 files changed

+32
-11
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "KernelAbstractions"
22
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
33
authors = ["Valentin Churavy <[email protected]> and contributors"]
4-
version = "0.9.33"
4+
version = "0.9.34"
55

66
[deps]
77
Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"

src/KernelAbstractions.jl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ end
284284
After a `@synchronize` statement all read and writes to global and local memory
285285
from each thread in the workgroup are visible in from all other threads in the
286286
workgroup.
287+
288+
!!! note
289+
`@synchronize()` must be encountered by all workitems of a work-group executing the kernel or by none at all.
287290
"""
288291
macro synchronize()
289292
return quote
@@ -301,10 +304,15 @@ workgroup. `cond` is not allowed to have any visible sideffects.
301304
# Platform differences
302305
- `GPU`: This synchronization will only occur if the `cond` evaluates.
303306
- `CPU`: This synchronization will always occur.
307+
308+
!!! warn
309+
This variant of the `@synchronize` macro violates the requirement that `@synchronize` must be encountered
310+
by all workitems of a work-group executing the kernel or by none at all.
311+
Since v`0.9.34` this version of the macro is deprecated and lowers to `@synchronize()`
304312
"""
305313
macro synchronize(cond)
306314
return quote
307-
$(esc(cond)) && $__synchronize()
315+
$__synchronize()
308316
end
309317
end
310318

src/macros.jl

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -86,22 +86,35 @@ function transform_gpu!(def, constargs, force_inbounds)
8686
end
8787
end
8888
pushfirst!(def[:args], :__ctx__)
89-
body = def[:body]
89+
new_stmts = Expr[]
90+
body = MacroTools.flatten(def[:body])
91+
stmts = body.args
92+
push!(new_stmts, Expr(:aliasscope))
93+
push!(new_stmts, :(__active_lane__ = $__validindex(__ctx__)))
9094
if force_inbounds
91-
body = quote
92-
@inbounds $(body)
93-
end
95+
push!(new_stmts, Expr(:inbounds, true))
9496
end
95-
body = quote
96-
if $__validindex(__ctx__)
97-
$(body)
97+
98+
# fix convergence
99+
active_stmts = Any[]
100+
for stmt in stmts
101+
push!(active_stmts, stmt)
102+
has_sync = find_sync(stmt)
103+
if has_sync
104+
push!(new_stmts, Expr(:if, :__active_lane__, Expr(:block, active_stmts...)))
105+
empty!(active_stmts)
98106
end
99-
return nothing
100107
end
108+
push!(new_stmts, Expr(:if, :__active_lane__, Expr(:block, active_stmts...)))
109+
if force_inbounds
110+
push!(new_stmts, Expr(:inbounds, :pop))
111+
end
112+
push!(new_stmts, Expr(:popaliasscope))
113+
push!(new_stmts, :(return nothing))
101114
def[:body] = Expr(
102115
:let,
103116
Expr(:block, let_constargs...),
104-
body,
117+
Expr(:block, new_stmts...),
105118
)
106119
return
107120
end

0 commit comments

Comments
 (0)