Skip to content

Commit 3c73b57

Browse files
authored
Swich grid sync to nested scope approach (llvm#3192)
2 parents c724224 + d92d89d commit 3c73b57

File tree

1 file changed

+10
-1
lines changed
  • amd/device-libs/ockl/src

1 file changed

+10
-1
lines changed

amd/device-libs/ockl/src/cg.cl

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,18 +105,27 @@ __ockl_grid_is_valid(void)
105105
void
106106
__ockl_grid_sync(void)
107107
{
108-
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "agent");
108+
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
109109
__builtin_amdgcn_s_barrier();
110+
110111
if (choose_one_workgroup_workitem()) {
112+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
113+
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
114+
111115
if (AVOID_GWS()) {
112116
__global struct mg_info *mi = (__global struct mg_info *)get_mg_info_arg();
113117
single_grid_sync(&mi->sgs, mi->num_wg);
114118
} else {
115119
uint nwm1 = (uint)__ockl_get_num_groups(0) * (uint)__ockl_get_num_groups(1) * (uint)__ockl_get_num_groups(2) - 1;
116120
__ockl_gws_barrier(nwm1, 0);
117121
}
122+
123+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
124+
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
118125
}
126+
119127
__builtin_amdgcn_s_barrier();
128+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
120129
}
121130

122131
__attribute__((const)) uint

0 commit comments

Comments
 (0)