Skip to content

Commit 2990885

Browse files
committed
Add split grid barrier
1 parent 6fb8900 commit 2990885

File tree

1 file changed

+46
-9
lines changed
  • amd/device-libs/ockl/src

1 file changed

+46
-9
lines changed

amd/device-libs/ockl/src/cg.cl

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,28 @@ choose_one_grid_workitem(void)
5454
__builtin_amdgcn_workitem_id_z() | __builtin_amdgcn_workgroup_id_z()) == 0;
5555
}
5656

57-
static inline void
58-
single_grid_sync(__global struct mg_sync *s, uint members)
57+
static inline uint
58+
single_grid_arrive(__global struct mg_sync *s, uint members)
5959
{
6060
// Assumes 65535 or fewer workgroups in the grid
6161
uint v = AA(&s->w0, 1U, memory_scope_device);
62-
if ((v & 0xffff) == members-1) {
62+
if ((v & 0xffff) == members-1)
6363
AA(&s->w0, 0x10000 - members, memory_scope_device);
64-
} else {
65-
v &= ~0xffff;
66-
do {
67-
__builtin_amdgcn_s_sleep(1);
68-
} while ((AL(&s->w0, memory_scope_device) & ~0xffff) == v);
69-
}
64+
return v & ~0xffff;
65+
}
66+
67+
static inline void
68+
single_grid_wait(__global struct mg_sync *s, uint t)
69+
{
70+
while ((AL(&s->w0, memory_scope_device) & ~0xffff) == t)
71+
__builtin_amdgcn_s_sleep(1);
72+
}
73+
74+
75+
static inline void
76+
single_grid_sync(__global struct mg_sync *s, uint members)
77+
{
78+
single_grid_wait(s, single_grid_arrive(s, members));
7079
}
7180

7281
static inline void
@@ -102,6 +111,34 @@ __ockl_grid_is_valid(void)
102111
return get_mg_info_arg() != 0UL;
103112
}
104113

114+
uint
115+
__ockl_grid_bar_arrive(void)
116+
{
117+
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
118+
__builtin_amdgcn_s_barrier();
119+
uint ret = 0;
120+
if (choose_one_workgroup_workitem()) {
121+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
122+
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "agent");
123+
__global struct mg_info *mi = (__global struct mg_info *)get_mg_info_arg();
124+
ret = single_grid_arrive(&mi->sgs, mi->num_wg);
125+
}
126+
return ret;
127+
}
128+
129+
void
130+
__ockl_grid_bar_wait(uint t)
131+
{
132+
if (choose_one_workgroup_workitem()) {
133+
__global struct mg_info *mi = (__global struct mg_info *)get_mg_info_arg();
134+
single_grid_wait(&mi->sgs, t);
135+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "agent");
136+
__builtin_amdgcn_fence(__ATOMIC_RELEASE, "workgroup");
137+
}
138+
__builtin_amdgcn_s_barrier();
139+
__builtin_amdgcn_fence(__ATOMIC_ACQUIRE, "workgroup");
140+
}
141+
105142
void
106143
__ockl_grid_sync(void)
107144
{

0 commit comments

Comments
 (0)