@@ -151,9 +151,7 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
151151_DEFAULT_FN_ATTRS static __inline__ uint32_t
152152__gpu_shuffle_idx_u32 (uint64_t __lane_mask , uint32_t __idx , uint32_t __x ) {
153153 uint32_t __mask = (uint32_t )__lane_mask ;
154- uint32_t __bitmask = (__mask >> __idx ) & 1u ;
155- return - __bitmask &
156- __nvvm_shfl_sync_idx_i32 (__mask , __x , __idx , __gpu_num_lanes () - 1u );
154+ return __nvvm_shfl_sync_idx_i32 (__mask , __x , __idx , __gpu_num_lanes () - 1u );
157155}
158156
159157// Shuffles the the lanes inside the warp according to the given index.
@@ -162,10 +160,9 @@ __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x) {
162160 uint32_t __hi = (uint32_t )(__x >> 32ull );
163161 uint32_t __lo = (uint32_t )(__x & 0xFFFFFFFF );
164162 uint32_t __mask = (uint32_t )__lane_mask ;
165- uint64_t __bitmask = (__mask >> __idx ) & 1u ;
166- return - __bitmask & ((uint64_t )__nvvm_shfl_sync_idx_i32 (
167- __mask , __hi , __idx , __gpu_num_lanes () - 1u )
168- << 32ull ) |
163+ return ((uint64_t )__nvvm_shfl_sync_idx_i32 (__mask , __hi , __idx ,
164+ __gpu_num_lanes () - 1u )
165+ << 32ull ) |
169166 ((uint64_t )__nvvm_shfl_sync_idx_i32 (__mask , __lo , __idx ,
170167 __gpu_num_lanes () - 1u ));
171168}
0 commit comments