@@ -180,8 +180,9 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x,
180
180
_DEFAULT_FN_ATTRS static __inline__ uint64_t
181
181
__gpu_match_any_u32 (uint64_t __lane_mask , uint32_t __x ) {
182
182
// Newer targets can use the dedicated CUDA support.
183
- if (__CUDA_ARCH__ >= 700 || __nvvm_reflect ("__CUDA_ARCH" ) >= 700 )
184
- return __nvvm_match_any_sync_i32 (__lane_mask , __x );
183
+ #if __CUDA_ARCH__ >= 700
184
+ return __nvvm_match_any_sync_i32 (__lane_mask , __x );
185
+ #endif
185
186
186
187
uint32_t __match_mask = 0 ;
187
188
bool __done = 0 ;
@@ -201,8 +202,9 @@ __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) {
201
202
_DEFAULT_FN_ATTRS static __inline__ uint64_t
202
203
__gpu_match_any_u64 (uint64_t __lane_mask , uint64_t __x ) {
203
204
// Newer targets can use the dedicated CUDA support.
204
- if (__CUDA_ARCH__ >= 700 || __nvvm_reflect ("__CUDA_ARCH" ) >= 700 )
205
- return __nvvm_match_any_sync_i64 (__lane_mask , __x );
205
+ #if __CUDA_ARCH__ >= 700
206
+ return __nvvm_match_any_sync_i64 (__lane_mask , __x );
207
+ #endif
206
208
207
209
uint64_t __match_mask = 0 ;
208
210
@@ -224,9 +226,10 @@ __gpu_match_any_u64(uint64_t __lane_mask, uint64_t __x) {
224
226
_DEFAULT_FN_ATTRS static __inline__ uint64_t
225
227
__gpu_match_all_u32 (uint64_t __lane_mask , uint32_t __x ) {
226
228
// Newer targets can use the dedicated CUDA support.
229
+ #if __CUDA_ARCH__ >= 700
227
230
int predicate ;
228
- if ( __CUDA_ARCH__ >= 700 || __nvvm_reflect ( "__CUDA_ARCH" ) >= 700 )
229
- return __nvvm_match_all_sync_i32p ( __lane_mask , __x , & predicate );
231
+ return __nvvm_match_all_sync_i32p ( __lane_mask , __x , & predicate );
232
+ #endif
230
233
231
234
uint32_t __first = __gpu_read_first_lane_u64 (__lane_mask , __x );
232
235
uint64_t __ballot = __gpu_ballot (__lane_mask , __x == __first );
@@ -237,9 +240,10 @@ __gpu_match_all_u32(uint64_t __lane_mask, uint32_t __x) {
237
240
_DEFAULT_FN_ATTRS static __inline__ uint64_t
238
241
__gpu_match_all_u64 (uint64_t __lane_mask , uint64_t __x ) {
239
242
// Newer targets can use the dedicated CUDA support.
243
+ #if __CUDA_ARCH__ >= 700
240
244
int predicate ;
241
- if ( __CUDA_ARCH__ >= 700 || __nvvm_reflect ( "__CUDA_ARCH" ) >= 700 )
242
- return __nvvm_match_all_sync_i64p ( __lane_mask , __x , & predicate );
245
+ return __nvvm_match_all_sync_i64p ( __lane_mask , __x , & predicate );
246
+ #endif
243
247
244
248
uint64_t __first = __gpu_read_first_lane_u64 (__lane_mask , __x );
245
249
uint64_t __ballot = __gpu_ballot (__lane_mask , __x == __first );
0 commit comments