Skip to content

Commit f905445

Browse files
committed
c89 support
1 parent 24e04fa commit f905445

File tree

5 files changed

+102
-67
lines changed

5 files changed

+102
-67
lines changed

clang/lib/Headers/amdgpuintrin.h

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,11 @@
1313
#error "This file is intended for AMDGPU targets or offloading to AMDGPU"
1414
#endif
1515

16-
#include <stdbool.h>
1716
#include <stdint.h>
1817

19-
#if !defined(_DEFAULT_FN_ATTRS)
20-
#if defined(__HIP__) || defined(__CUDA__)
21-
#define _DEFAULT_FN_ATTRS __attribute__((device))
22-
#else
23-
#define _DEFAULT_FN_ATTRS
24-
#endif
18+
#if !defined(__cplusplus)
19+
#pragma push_macro("bool")
20+
#define bool _Bool
2521
#endif
2622

2723
#pragma omp begin declare target device_type(nohost)
@@ -38,118 +34,122 @@
3834
#define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
3935

4036
// Returns the number of workgroups in the 'x' dimension of the grid.
41-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_blocks_x(void) {
37+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
4238
return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
4339
}
4440

4541
// Returns the number of workgroups in the 'y' dimension of the grid.
46-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_blocks_y(void) {
42+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_y(void) {
4743
return __builtin_amdgcn_grid_size_y() / __builtin_amdgcn_workgroup_size_y();
4844
}
4945

5046
// Returns the number of workgroups in the 'z' dimension of the grid.
51-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_blocks_z(void) {
47+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_z(void) {
5248
return __builtin_amdgcn_grid_size_z() / __builtin_amdgcn_workgroup_size_z();
5349
}
5450

5551
// Returns the 'x' dimension of the current AMD workgroup's id.
56-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_block_id_x(void) {
52+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_x(void) {
5753
return __builtin_amdgcn_workgroup_id_x();
5854
}
5955

6056
// Returns the 'y' dimension of the current AMD workgroup's id.
61-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_block_id_y(void) {
57+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_y(void) {
6258
return __builtin_amdgcn_workgroup_id_y();
6359
}
6460

6561
// Returns the 'z' dimension of the current AMD workgroup's id.
66-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_block_id_z(void) {
62+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id_z(void) {
6763
return __builtin_amdgcn_workgroup_id_z();
6864
}
6965

7066
// Returns the number of workitems in the 'x' dimension.
71-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_threads_x(void) {
67+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_x(void) {
7268
return __builtin_amdgcn_workgroup_size_x();
7369
}
7470

7571
// Returns the number of workitems in the 'y' dimension.
76-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_threads_y(void) {
72+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_y(void) {
7773
return __builtin_amdgcn_workgroup_size_y();
7874
}
7975

8076
// Returns the number of workitems in the 'z' dimension.
81-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_threads_z(void) {
77+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads_z(void) {
8278
return __builtin_amdgcn_workgroup_size_z();
8379
}
8480

8581
// Returns the 'x' dimension id of the workitem in the current AMD workgroup.
86-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_thread_id_x(void) {
82+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_x(void) {
8783
return __builtin_amdgcn_workitem_id_x();
8884
}
8985

9086
// Returns the 'y' dimension id of the workitem in the current AMD workgroup.
91-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_thread_id_y(void) {
87+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_y(void) {
9288
return __builtin_amdgcn_workitem_id_y();
9389
}
9490

9591
// Returns the 'z' dimension id of the workitem in the current AMD workgroup.
96-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_thread_id_z(void) {
92+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id_z(void) {
9793
return __builtin_amdgcn_workitem_id_z();
9894
}
9995

10096
// Returns the size of an AMD wavefront, either 32 or 64 depending on hardware
10197
// and compilation options.
102-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_lanes(void) {
98+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_lanes(void) {
10399
return __builtin_amdgcn_wavefrontsize();
104100
}
105101

106102
// Returns the id of the thread inside of an AMD wavefront executing together.
107-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_lane_id(void) {
103+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_lane_id(void) {
108104
return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
109105
}
110106

111107
// Returns the bit-mask of active threads in the current wavefront.
112-
_DEFAULT_FN_ATTRS static inline uint64_t __gpu_lane_mask(void) {
108+
_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_lane_mask(void) {
113109
return __builtin_amdgcn_read_exec();
114110
}
115111

116112
// Copies the value from the first active thread in the wavefront to the rest.
117-
_DEFAULT_FN_ATTRS static inline uint32_t
113+
_DEFAULT_FN_ATTRS static __inline__ uint32_t
118114
__gpu_broadcast_u32(uint64_t __lane_mask, uint32_t __x) {
119115
return __builtin_amdgcn_readfirstlane(__x);
120116
}
121117

122118
// Returns a bitmask of threads in the current lane for which \p x is true.
123-
_DEFAULT_FN_ATTRS static inline uint64_t __gpu_ballot(uint64_t __lane_mask,
124-
bool __x) {
119+
_DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_ballot(uint64_t __lane_mask,
120+
bool __x) {
125121
// The lane_mask & gives the nvptx semantics when lane_mask is a subset of
126122
// the active threads
127123
return __lane_mask & __builtin_amdgcn_ballot_w64(__x);
128124
}
129125

130126
// Waits for all the threads in the block to converge and issues a fence.
131-
_DEFAULT_FN_ATTRS static inline void __gpu_sync_threads(void) {
127+
_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_threads(void) {
132128
__builtin_amdgcn_s_barrier();
133129
__builtin_amdgcn_fence(__ATOMIC_SEQ_CST, "workgroup");
134130
}
135131

136132
// Wait for all threads in the wavefront to converge, this is a noop on AMDGPU.
137-
_DEFAULT_FN_ATTRS static inline void __gpu_sync_lane(uint64_t __lane_mask) {
133+
_DEFAULT_FN_ATTRS static __inline__ void __gpu_sync_lane(uint64_t __lane_mask) {
138134
__builtin_amdgcn_wave_barrier();
139135
}
140136

141137
// Shuffles the the lanes inside the wavefront according to the given index.
142-
_DEFAULT_FN_ATTRS static inline uint32_t
138+
_DEFAULT_FN_ATTRS static __inline__ uint32_t
143139
__gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x) {
144140
return __builtin_amdgcn_ds_bpermute(__idx << 2, __x);
145141
}
146142

147143
// Terminates execution of the associated wavefront.
148-
_DEFAULT_FN_ATTRS [[noreturn]] static inline void __gpu_exit(void) {
144+
_DEFAULT_FN_ATTRS [[noreturn]] static __inline__ void __gpu_exit(void) {
149145
__builtin_amdgcn_endpgm();
150146
}
151147

152148
#pragma omp end declare variant
153149
#pragma omp end declare target
154150

151+
#if !defined(__cplusplus)
152+
#pragma pop_macro("bool")
153+
#endif
154+
155155
#endif // __AMDGPUINTRIN_H

clang/lib/Headers/gpuintrin.h

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@
1717
#ifndef __GPUINTRIN_H
1818
#define __GPUINTRIN_H
1919

20+
#if !defined(_DEFAULT_FN_ATTRS)
21+
#if defined(__HIP__) || defined(__CUDA__)
22+
#define _DEFAULT_FN_ATTRS __attribute__((device))
23+
#else
24+
#define _DEFAULT_FN_ATTRS
25+
#endif
26+
#endif
27+
2028
#if defined(__NVPTX__)
2129
#include <nvptxintrin.h>
2230
#elif defined(__AMDGPU__)
@@ -25,8 +33,15 @@
2533
#error "This header is only meant to be used on GPU architectures."
2634
#endif
2735

36+
#if !defined(__cplusplus)
37+
#pragma push_macro("bool")
38+
#define bool _Bool
39+
#endif
40+
41+
#pragma omp begin declare target device_type(nohost)
42+
2843
// Returns the number of blocks in the requested dimension.
29-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_blocks(int __dim) {
44+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks(int __dim) {
3045
switch (__dim) {
3146
case 0:
3247
return __gpu_num_blocks_x();
@@ -40,7 +55,7 @@ _DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_blocks(int __dim) {
4055
}
4156

4257
// Returns the number of block id in the requested dimension.
43-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_block_id(int __dim) {
58+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_block_id(int __dim) {
4459
switch (__dim) {
4560
case 0:
4661
return __gpu_block_id_x();
@@ -54,7 +69,7 @@ _DEFAULT_FN_ATTRS static inline uint32_t __gpu_block_id(int __dim) {
5469
}
5570

5671
// Returns the number of threads in the requested dimension.
57-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_threads(int __dim) {
72+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_threads(int __dim) {
5873
switch (__dim) {
5974
case 0:
6075
return __gpu_num_threads_x();
@@ -68,7 +83,7 @@ _DEFAULT_FN_ATTRS static inline uint32_t __gpu_num_threads(int __dim) {
6883
}
6984

7085
// Returns the thread id in the requested dimension.
71-
_DEFAULT_FN_ATTRS static inline uint32_t __gpu_thread_id(int __dim) {
86+
_DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_thread_id(int __dim) {
7287
switch (__dim) {
7388
case 0:
7489
return __gpu_thread_id_x();
@@ -82,19 +97,19 @@ _DEFAULT_FN_ATTRS static inline uint32_t __gpu_thread_id(int __dim) {
8297
}
8398

8499
// Get the first active thread inside the lane.
85-
_DEFAULT_FN_ATTRS static inline uint64_t
100+
_DEFAULT_FN_ATTRS static __inline__ uint64_t
86101
__gpu_first_lane_id(uint64_t __lane_mask) {
87102
return __builtin_ffsll(__lane_mask) - 1;
88103
}
89104

90105
// Conditional that is only true for a single thread in a lane.
91-
_DEFAULT_FN_ATTRS static inline bool
106+
_DEFAULT_FN_ATTRS static __inline__ bool
92107
__gpu_is_first_in_lane(uint64_t __lane_mask) {
93108
return __gpu_lane_id() == __gpu_first_lane_id(__lane_mask);
94109
}
95110

96111
// Gets the sum of all lanes inside the warp or wavefront.
97-
_DEFAULT_FN_ATTRS static inline uint32_t
112+
_DEFAULT_FN_ATTRS static __inline__ uint32_t
98113
__gpu_lane_reduce_u32(uint64_t __lane_mask, uint32_t x) {
99114
for (uint32_t step = __gpu_num_lanes() / 2; step > 0; step /= 2) {
100115
uint32_t index = step + __gpu_lane_id();
@@ -104,7 +119,7 @@ __gpu_lane_reduce_u32(uint64_t __lane_mask, uint32_t x) {
104119
}
105120

106121
// Gets the accumulator scan of the threads in the warp or wavefront.
107-
_DEFAULT_FN_ATTRS static inline uint32_t
122+
_DEFAULT_FN_ATTRS static __inline__ uint32_t
108123
__gpu_lane_scan_u32(uint64_t __lane_mask, uint32_t x) {
109124
for (uint32_t step = 1; step < __gpu_num_lanes(); step *= 2) {
110125
uint32_t index = __gpu_lane_id() - step;
@@ -114,6 +129,12 @@ __gpu_lane_scan_u32(uint64_t __lane_mask, uint32_t x) {
114129
return x;
115130
}
116131

132+
#pragma omp end declare target
133+
134+
#if !defined(__cplusplus)
135+
#pragma pop_macro("bool")
136+
#endif
137+
117138
#undef _DEFAULT_FN_ATTRS
118139

119140
#endif // __GPUINTRIN_H

0 commit comments

Comments
 (0)