Skip to content

Commit 19b46c8

Browse files
committed
[OpenMP] Add explicit attributes to every function declaration
Summary: Instead of having the scoped attributes, add this to every function.
1 parent 0acdba8 commit 19b46c8

28 files changed

+849
-736
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
9898
set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
9999
${clang_opt_flags} --offload-device-only
100100
-nocudalib -nogpulib -nogpuinc -nostdlibinc
101-
-fopenmp -fopenmp-cuda-mode
101+
-fopenmp -fopenmp-cuda-mode -Wno-unknown-assumption
102102
-Wno-unknown-cuda-version -Wno-openmp-target
103103
-DOMPTARGET_DEVICE_RUNTIME
104104
-I${include_directory}

offload/DeviceRTL/include/Allocator.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,23 @@ namespace allocator {
2626
static uint64_t constexpr ALIGNMENT = 16;
2727

2828
/// Initialize the allocator according to \p KernelEnvironment
29-
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
29+
OMP_ATTRS void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
3030

3131
/// Allocate \p Size bytes.
32-
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
32+
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT),
33+
gnu::malloc]] OMP_ATTRS void *
3334
alloc(uint64_t Size);
3435

3536
/// Free the allocation pointed to by \p Ptr.
36-
void free(void *Ptr);
37+
OMP_ATTRS void free(void *Ptr);
3738

3839
} // namespace allocator
3940

4041
} // namespace ompx
4142

4243
extern "C" {
43-
[[gnu::weak]] void *malloc(size_t Size);
44-
[[gnu::weak]] void free(void *Ptr);
44+
[[gnu::weak]] OMP_ATTRS void *malloc(size_t Size);
45+
[[gnu::weak]] OMP_ATTRS void free(void *Ptr);
4546
}
4647

4748
#pragma omp end declare target

offload/DeviceRTL/include/Configuration.h

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,45 +22,45 @@ namespace config {
2222

2323
/// Return the number of devices in the system, same number as returned on the
2424
/// host by omp_get_num_devices.
25-
uint32_t getNumDevices();
25+
OMP_ATTRS uint32_t getNumDevices();
2626

2727
/// Return the device number in the system for omp_get_device_num.
28-
uint32_t getDeviceNum();
28+
OMP_ATTRS uint32_t getDeviceNum();
2929

3030
/// Return the user choosen debug level.
31-
uint32_t getDebugKind();
31+
OMP_ATTRS uint32_t getDebugKind();
3232

3333
/// Return if teams oversubscription is assumed
34-
uint32_t getAssumeTeamsOversubscription();
34+
OMP_ATTRS uint32_t getAssumeTeamsOversubscription();
3535

3636
/// Return if threads oversubscription is assumed
37-
uint32_t getAssumeThreadsOversubscription();
37+
OMP_ATTRS uint32_t getAssumeThreadsOversubscription();
3838

3939
/// Return the amount of dynamic shared memory that was allocated at launch.
40-
uint64_t getDynamicMemorySize();
40+
OMP_ATTRS uint64_t getDynamicMemorySize();
4141

4242
/// Returns the cycles per second of the device's fixed frequency clock.
43-
uint64_t getClockFrequency();
43+
OMP_ATTRS uint64_t getClockFrequency();
4444

4545
/// Returns the pointer to the beginning of the indirect call table.
46-
void *getIndirectCallTablePtr();
46+
OMP_ATTRS void *getIndirectCallTablePtr();
4747

4848
/// Returns the size of the indirect call table.
49-
uint64_t getIndirectCallTableSize();
49+
OMP_ATTRS uint64_t getIndirectCallTableSize();
5050

5151
/// Returns the size of the indirect call table.
52-
uint64_t getHardwareParallelism();
52+
OMP_ATTRS uint64_t getHardwareParallelism();
5353

5454
/// Return if debugging is enabled for the given debug kind.
55-
bool isDebugMode(DeviceDebugKind Level);
55+
OMP_ATTRS bool isDebugMode(DeviceDebugKind Level);
5656

5757
/// Indicates if this kernel may require thread-specific states, or if it was
5858
/// explicitly disabled by the user.
59-
bool mayUseThreadStates();
59+
OMP_ATTRS bool mayUseThreadStates();
6060

6161
/// Indicates if this kernel may require data environments for nested
6262
/// parallelism, or if it was explicitly disabled by the user.
63-
bool mayUseNestedParallelism();
63+
OMP_ATTRS bool mayUseNestedParallelism();
6464

6565
} // namespace config
6666
} // namespace ompx

offload/DeviceRTL/include/Debug.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919
///
2020
/// {
2121
extern "C" {
22-
void __assert_assume(bool condition);
23-
void __assert_fail(const char *expr, const char *file, unsigned line,
24-
const char *function);
25-
void __assert_fail_internal(const char *expr, const char *msg, const char *file,
26-
unsigned line, const char *function);
22+
OMP_ATTRS void __assert_assume(bool condition);
23+
OMP_ATTRS void __assert_fail(const char *expr, const char *file, unsigned line,
24+
const char *function);
25+
OMP_ATTRS void __assert_fail_internal(const char *expr, const char *msg,
26+
const char *file, unsigned line,
27+
const char *function);
2728
}
2829

2930
#define ASSERT(expr, msg) \

offload/DeviceRTL/include/DeviceTypes.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@
2020
// another function but only inline assembly that performs some operation or
2121
// side-effect and then continues execution with something on the existing call
2222
// stack.
23-
//
24-
// TODO: Find a good place for this
25-
#pragma omp assumes ext_no_call_asm
23+
#pragma omp begin declare variant match(device = {kind(gpu)})
24+
#define OMP_ATTRS [[omp::assume("ext_no_call_asm")]]
25+
#pragma omp end declare variant
2626

2727
enum omp_proc_bind_t {
2828
omp_proc_bind_false = 0,

offload/DeviceRTL/include/DeviceUtils.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,32 +60,35 @@ struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
6060
template <class T>
6161
using remove_addrspace_t = typename remove_addrspace<T>::type;
6262

63-
template <typename To, typename From> inline To bitCast(From V) {
63+
template <typename To, typename From> OMP_ATTRS inline To bitCast(From V) {
6464
static_assert(sizeof(To) == sizeof(From), "Bad conversion");
6565
return __builtin_bit_cast(To, V);
6666
}
6767

6868
/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
6969
/// is identified by \p Mask.
70-
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
70+
OMP_ATTRS int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
71+
int32_t Width);
7172

72-
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
73+
OMP_ATTRS int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
74+
int32_t Width);
7375

74-
int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
76+
OMP_ATTRS int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
77+
int32_t Width);
7578

76-
uint64_t ballotSync(uint64_t Mask, int32_t Pred);
79+
OMP_ATTRS uint64_t ballotSync(uint64_t Mask, int32_t Pred);
7780

7881
/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
79-
uint64_t pack(uint32_t LowBits, uint32_t HighBits);
82+
OMP_ATTRS uint64_t pack(uint32_t LowBits, uint32_t HighBits);
8083

8184
/// Unpack \p Val into \p LowBits and \p HighBits.
82-
void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
85+
OMP_ATTRS void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
8386

8487
/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
85-
bool isSharedMemPtr(void *Ptr);
88+
OMP_ATTRS bool isSharedMemPtr(void *Ptr);
8689

8790
/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
88-
bool isThreadLocalMemPtr(void *Ptr);
91+
OMP_ATTRS bool isThreadLocalMemPtr(void *Ptr);
8992

9093
/// A pointer variable that has by design an `undef` value. Use with care.
9194
[[clang::loader_uninitialized]] static void *const UndefPtr;

0 commit comments

Comments
 (0)