@@ -54,19 +54,28 @@ choose_one_grid_workitem(void)
54
54
__builtin_amdgcn_workitem_id_z () | __builtin_amdgcn_workgroup_id_z ()) == 0 ;
55
55
}
56
56
57
- static inline void
58
- single_grid_sync (__global struct mg_sync * s , uint members )
57
+ static inline uint
58
+ single_grid_arrive (__global struct mg_sync * s , uint members )
59
59
{
60
60
// Assumes 65535 or fewer workgroups in the grid
61
61
uint v = AA (& s -> w0 , 1U , memory_scope_device );
62
- if ((v & 0xffff ) == members - 1 ) {
62
+ if ((v & 0xffff ) == members - 1 )
63
63
AA (& s -> w0 , 0x10000 - members , memory_scope_device );
64
- } else {
65
- v &= ~0xffff ;
66
- do {
67
- __builtin_amdgcn_s_sleep (1 );
68
- } while ((AL (& s -> w0 , memory_scope_device ) & ~0xffff ) == v );
69
- }
64
+ return v & ~0xffff ;
65
+ }
66
+
67
+ static inline void
68
+ single_grid_wait (__global struct mg_sync * s , uint t )
69
+ {
70
+ while ((AL (& s -> w0 , memory_scope_device ) & ~0xffff ) == t )
71
+ __builtin_amdgcn_s_sleep (1 );
72
+ }
73
+
74
+
75
+ static inline void
76
+ single_grid_sync (__global struct mg_sync * s , uint members )
77
+ {
78
+ single_grid_wait (s , single_grid_arrive (s , members ));
70
79
}
71
80
72
81
static inline void
@@ -102,6 +111,34 @@ __ockl_grid_is_valid(void)
102
111
return get_mg_info_arg () != 0UL ;
103
112
}
104
113
114
+ uint
115
+ __ockl_grid_bar_arrive (void )
116
+ {
117
+ __builtin_amdgcn_fence (__ATOMIC_RELEASE , "workgroup" );
118
+ __builtin_amdgcn_s_barrier ();
119
+ uint ret = 0 ;
120
+ if (choose_one_workgroup_workitem ()) {
121
+ __builtin_amdgcn_fence (__ATOMIC_ACQUIRE , "workgroup" );
122
+ __builtin_amdgcn_fence (__ATOMIC_RELEASE , "agent" );
123
+ __global struct mg_info * mi = (__global struct mg_info * )get_mg_info_arg ();
124
+ ret = single_grid_arrive (& mi -> sgs , mi -> num_wg );
125
+ }
126
+ return ret ;
127
+ }
128
+
129
+ void
130
+ __ockl_grid_bar_wait (uint t )
131
+ {
132
+ if (choose_one_workgroup_workitem ()) {
133
+ __global struct mg_info * mi = (__global struct mg_info * )get_mg_info_arg ();
134
+ single_grid_wait (& mi -> sgs , t );
135
+ __builtin_amdgcn_fence (__ATOMIC_ACQUIRE , "agent" );
136
+ __builtin_amdgcn_fence (__ATOMIC_RELEASE , "workgroup" );
137
+ }
138
+ __builtin_amdgcn_s_barrier ();
139
+ __builtin_amdgcn_fence (__ATOMIC_ACQUIRE , "workgroup" );
140
+ }
141
+
105
142
void
106
143
__ockl_grid_sync (void )
107
144
{
0 commit comments