@@ -32,6 +32,30 @@ _Pragma("push_macro(\"bool\")");
3232#define bool _Bool
3333#endif
3434
35+ _Pragma ("omp begin declare target device_type(nohost)" );
36+ _Pragma ("omp begin declare variant match(device = {kind(gpu)})" );
37+
38+ // Forward declare a few functions for the implementation header.
39+
40+ // Returns a bitmask marking all lanes that have the same value of __x.
41+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
42+ __gpu_match_any_u32_impl (uint64_t __lane_mask , uint32_t __x );
43+
44+ // Returns a bitmask marking all lanes that have the same value of __x.
45+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
46+ __gpu_match_any_u64_impl (uint64_t __lane_mask , uint64_t __x );
47+
48+ // Returns the current lane mask if every lane contains __x.
49+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
50+ __gpu_match_all_u32_impl (uint64_t __lane_mask , uint32_t __x );
51+
52+ // Returns the current lane mask if every lane contains __x.
53+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
54+ __gpu_match_all_u64_impl (uint64_t __lane_mask , uint64_t __x );
55+
56+ _Pragma ("omp end declare variant" );
57+ _Pragma ("omp end declare target" );
58+
3559#if defined(__NVPTX__ )
3660#include <nvptxintrin.h>
3761#elif defined(__AMDGPU__ )
@@ -115,7 +139,7 @@ __gpu_is_first_in_lane(uint64_t __lane_mask) {
115139 return __gpu_lane_id () == __gpu_first_lane_id (__lane_mask );
116140}
117141
118- // Copies the value from the first active thread in the wavefront to the rest.
142+ // Copies the value from the first active thread to the rest.
119143_DEFAULT_FN_ATTRS static __inline__ uint64_t
120144__gpu_read_first_lane_u64 (uint64_t __lane_mask , uint64_t __x ) {
121145 uint32_t __hi = (uint32_t )(__x >> 32ull );
@@ -234,6 +258,62 @@ __DO_LANE_SUM(float, f32); // float __gpu_lane_sum_f32(m, x)
234258__DO_LANE_SUM (double , f64 ); // double __gpu_lane_sum_f64(m, x)
235259#undef __DO_LANE_SUM
236260
261+ // Returns a bitmask marking all lanes that have the same value of __x.
262+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
263+ __gpu_match_any_u32_impl (uint64_t __lane_mask , uint32_t __x ) {
264+ uint32_t __match_mask = 0 ;
265+
266+ bool __done = 0 ;
267+ while (__gpu_ballot (__lane_mask , !__done )) {
268+ if (!__done ) {
269+ uint32_t __first = __gpu_read_first_lane_u32 (__lane_mask , __x );
270+ if (__first == __x ) {
271+ __match_mask = __gpu_lane_mask ();
272+ __done = 1 ;
273+ }
274+ }
275+ }
276+ __gpu_sync_lane (__lane_mask );
277+ return __match_mask ;
278+ }
279+
280+ // Returns a bitmask marking all lanes that have the same value of __x.
281+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
282+ __gpu_match_any_u64_impl (uint64_t __lane_mask , uint64_t __x ) {
283+ uint64_t __match_mask = 0 ;
284+
285+ bool __done = 0 ;
286+ while (__gpu_ballot (__lane_mask , !__done )) {
287+ if (!__done ) {
288+ uint64_t __first = __gpu_read_first_lane_u64 (__lane_mask , __x );
289+ if (__first == __x ) {
290+ __match_mask = __gpu_lane_mask ();
291+ __done = 1 ;
292+ }
293+ }
294+ }
295+ __gpu_sync_lane (__lane_mask );
296+ return __match_mask ;
297+ }
298+
299+ // Returns the current lane mask if every lane contains __x.
300+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
301+ __gpu_match_all_u32_impl (uint64_t __lane_mask , uint32_t __x ) {
302+ uint32_t __first = __gpu_read_first_lane_u32 (__lane_mask , __x );
303+ uint64_t __ballot = __gpu_ballot (__lane_mask , __x == __first );
304+ __gpu_sync_lane (__lane_mask );
305+ return __ballot == __gpu_lane_mask () ? __gpu_lane_mask () : 0ull ;
306+ }
307+
308+ // Returns the current lane mask if every lane contains __x.
309+ _DEFAULT_FN_ATTRS static __inline__ uint64_t
310+ __gpu_match_all_u64_impl (uint64_t __lane_mask , uint64_t __x ) {
311+ uint64_t __first = __gpu_read_first_lane_u64 (__lane_mask , __x );
312+ uint64_t __ballot = __gpu_ballot (__lane_mask , __x == __first );
313+ __gpu_sync_lane (__lane_mask );
314+ return __ballot == __gpu_lane_mask () ? __gpu_lane_mask () : 0ull ;
315+ }
316+
237317_Pragma ("omp end declare variant" );
238318_Pragma ("omp end declare target" );
239319
0 commit comments