Skip to content

Commit 7ebfdf4

Browse files
committed
Update names
1 parent db8dbd1 commit 7ebfdf4

File tree

4 files changed

+35
-25
lines changed

4 files changed

+35
-25
lines changed

clang/lib/Headers/amdgpuintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
#pragma omp begin declare variant match(device = {arch(amdgcn)})
2727

2828
// Type aliases to the address spaces used by the AMDGPU backend.
29-
#define __private __attribute__((opencl_private))
30-
#define __constant __attribute__((opencl_constant))
31-
#define __local __attribute__((opencl_local))
32-
#define __global __attribute__((opencl_global))
33-
#define __generic __attribute__((opencl_generic))
29+
#define __gpu_private __attribute__((opencl_private))
30+
#define __gpu_constant __attribute__((opencl_constant))
31+
#define __gpu_local __attribute__((opencl_local))
32+
#define __gpu_global __attribute__((opencl_global))
33+
#define __gpu_generic __attribute__((opencl_generic))
3434

3535
// Attribute to declare a function as a kernel.
36-
#define __kernel __attribute__((amdgpu_kernel, visibility("protected")))
36+
#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
3737

3838
// Returns the number of workgroups in the 'x' dimension of the grid.
3939
_DEFAULT_ATTRS static inline uint32_t __gpu_num_blocks_x() {
@@ -113,7 +113,7 @@ _DEFAULT_ATTRS [[clang::convergent]] static inline uint64_t __gpu_lane_mask() {
113113

114114
// Copies the value from the first active thread in the wavefront to the rest.
115115
_DEFAULT_ATTRS [[clang::convergent]] static inline uint32_t
116-
__gpu_broadcast(uint64_t __lane_mask, uint32_t __x) {
116+
__gpu_broadcast_u32(uint64_t __lane_mask, uint32_t __x) {
117117
return __builtin_amdgcn_readfirstlane(__x);
118118
}
119119

@@ -139,7 +139,7 @@ __gpu_sync_lane(uint64_t __lane_mask) {
139139

140140
// Shuffles the the lanes inside the wavefront according to the given index.
141141
_DEFAULT_ATTRS [[clang::convergent]] static inline uint32_t
142-
__gpu_shuffle_idx(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
142+
__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
143143
return __builtin_amdgcn_ds_bpermute(__idx << 2, __x);
144144
}
145145

clang/lib/Headers/gpuintrin.h

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
// Provides wrappers around the clang builtins for accessing GPU hardware
10+
// features. The interface is intended to be portable between architectures, but
11+
// some targets may provide different implementations. This header can be
12+
// included for all the common GPU programming languages, namely OpenMP, HIP,
13+
// CUDA, and OpenCL.
14+
//
15+
//===----------------------------------------------------------------------===//
816

917
#ifndef __GPUINTRIN_H
1018
#define __GPUINTRIN_H
@@ -13,6 +21,8 @@
1321
#include <nvptxintrin.h>
1422
#elif defined(__AMDGPU__)
1523
#include <amdgpuintrin.h>
24+
#else
25+
#error "This header is only meant to be used on GPU architectures."
1626
#endif
1727

1828
// Returns the total number of blocks / workgroups.
@@ -51,22 +61,22 @@ _DEFAULT_ATTRS static inline bool __gpu_is_first_lane(uint64_t __lane_mask) {
5161
}
5262

5363
// Gets the sum of all lanes inside the warp or wavefront.
54-
_DEFAULT_ATTRS static inline uint32_t __gpu_lane_reduce(uint64_t __lane_mask,
55-
uint32_t x) {
64+
_DEFAULT_ATTRS static inline uint32_t
65+
__gpu_lane_reduce_u32(uint64_t __lane_mask, uint32_t x) {
5666
for (uint32_t step = __gpu_num_lanes() / 2; step > 0; step /= 2) {
5767
uint32_t index = step + __gpu_lane_id();
58-
x += __gpu_shuffle_idx(__lane_mask, index, x);
68+
x += __gpu_shuffle_idx_u32(__lane_mask, index, x);
5969
}
60-
return __gpu_broadcast(__lane_mask, x);
70+
return __gpu_broadcast_u32(__lane_mask, x);
6171
}
6272

6373
// Gets the accumulator scan of the threads in the warp or wavefront.
64-
_DEFAULT_ATTRS static inline uint32_t __gpu_lane_scan(uint64_t __lane_mask,
65-
uint32_t x) {
74+
_DEFAULT_ATTRS static inline uint32_t __gpu_lane_scan_u32(uint64_t __lane_mask,
75+
uint32_t x) {
6676
for (uint32_t step = 1; step < __gpu_num_lanes(); step *= 2) {
6777
uint32_t index = __gpu_lane_id() - step;
6878
uint32_t bitmask = __gpu_lane_id() >= step;
69-
x += -bitmask & __gpu_shuffle_idx(__lane_mask, index, x);
79+
x += -bitmask & __gpu_shuffle_idx_u32(__lane_mask, index, x);
7080
}
7181
return x;
7282
}

clang/lib/Headers/nvptxintrin.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@
2626
#pragma omp begin declare variant match(device = {arch(nvptx64)})
2727

2828
// Type aliases to the address spaces used by the NVPTX backend.
29-
#define __private __attribute__((opencl_private))
30-
#define __constant __attribute__((opencl_constant))
31-
#define __local __attribute__((opencl_local))
32-
#define __global __attribute__((opencl_global))
33-
#define __generic __attribute__((opencl_generic))
29+
#define __gpu_private __attribute__((opencl_private))
30+
#define __gpu_constant __attribute__((opencl_constant))
31+
#define __gpu_local __attribute__((opencl_local))
32+
#define __gpu_global __attribute__((opencl_global))
33+
#define __gpu_generic __attribute__((opencl_generic))
3434

3535
// Attribute to declare a function as a kernel.
36-
#define __kernel __attribute__((amdgpu_kernel, visibility("protected")))
36+
#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
3737

3838
// Returns the number of CUDA blocks in the 'x' dimension.
3939
_DEFAULT_ATTRS static inline uint32_t __gpu_num_blocks_x() {
@@ -112,7 +112,7 @@ _DEFAULT_ATTRS [[clang::convergent]] static inline uint64_t __gpu_lane_mask() {
112112

113113
// Copies the value from the first active thread in the warp to the rest.
114114
_DEFAULT_ATTRS [[clang::convergent]] static inline uint32_t
115-
__gpu_broadcast(uint64_t __lane_mask, uint32_t __x) {
115+
__gpu_broadcast_u32(uint64_t __lane_mask, uint32_t __x) {
116116
uint32_t __mask = (uint32_t)__lane_mask;
117117
uint32_t __id = __builtin_ffs(__mask) - 1;
118118
return __nvvm_shfl_sync_idx_i32(__mask, __x, __id, __gpu_num_lanes() - 1);
@@ -138,7 +138,7 @@ __gpu_sync_lane(uint64_t __lane_mask) {
138138

139139
// Shuffles the the lanes inside the warp according to the given index.
140140
_DEFAULT_ATTRS [[clang::convergent]] static inline uint32_t
141-
__gpu_shuffle_idx(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
141+
__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
142142
uint32_t __mask = (uint32_t)__lane_mask;
143143
uint32_t __bitmask = (__mask >> __idx) & 1u;
144144
return -__bitmask &

clang/test/Headers/gpuintrin.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -493,11 +493,11 @@ void foo() {
493493
__gpu_num_lanes();
494494
__gpu_lane_id();
495495
__gpu_lane_mask();
496-
__gpu_broadcast(-1, -1);
496+
__gpu_broadcast_u32(-1, -1);
497497
__gpu_ballot(-1, 1);
498498
__gpu_sync_threads();
499499
__gpu_sync_lane(-1);
500-
__gpu_shuffle_idx(-1, -1, -1);
500+
__gpu_shuffle_idx_u32(-1, -1, -1);
501501
__gpu_first_lane_id(-1);
502502
__gpu_is_first_lane(-1);
503503
__gpu_exit();

0 commit comments

Comments
 (0)