Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
set(bc_flags -c -foffload-lto -std=c++17 -fvisibility=hidden
${clang_opt_flags} --offload-device-only
-nocudalib -nogpulib -nogpuinc -nostdlibinc
-fopenmp -fopenmp-cuda-mode
-fopenmp -fopenmp-cuda-mode -Wno-unknown-assumption
-Wno-unknown-cuda-version -Wno-openmp-target
-DOMPTARGET_DEVICE_RUNTIME
-I${include_directory}
Expand Down
11 changes: 6 additions & 5 deletions offload/DeviceRTL/include/Allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,23 @@ namespace allocator {
static uint64_t constexpr ALIGNMENT = 16;

/// Initialize the allocator according to \p KernelEnvironment
void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);
OMP_ATTRS void init(bool IsSPMD, KernelEnvironmentTy &KernelEnvironment);

/// Allocate \p Size bytes.
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT), gnu::malloc]] void *
[[gnu::alloc_size(1), gnu::assume_aligned(ALIGNMENT),
gnu::malloc]] OMP_ATTRS void *
alloc(uint64_t Size);

/// Free the allocation pointed to by \p Ptr.
void free(void *Ptr);
OMP_ATTRS void free(void *Ptr);

} // namespace allocator

} // namespace ompx

extern "C" {
[[gnu::weak]] void *malloc(size_t Size);
[[gnu::weak]] void free(void *Ptr);
[[gnu::weak]] OMP_ATTRS void *malloc(size_t Size);
[[gnu::weak]] OMP_ATTRS void free(void *Ptr);
}

#pragma omp end declare target
Expand Down
26 changes: 13 additions & 13 deletions offload/DeviceRTL/include/Configuration.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,45 +22,45 @@ namespace config {

/// Return the number of devices in the system, same number as returned on the
/// host by omp_get_num_devices.
uint32_t getNumDevices();
OMP_ATTRS uint32_t getNumDevices();

/// Return the device number in the system for omp_get_device_num.
uint32_t getDeviceNum();
OMP_ATTRS uint32_t getDeviceNum();

/// Return the user choosen debug level.
uint32_t getDebugKind();
OMP_ATTRS uint32_t getDebugKind();

/// Return if teams oversubscription is assumed
uint32_t getAssumeTeamsOversubscription();
OMP_ATTRS uint32_t getAssumeTeamsOversubscription();

/// Return if threads oversubscription is assumed
uint32_t getAssumeThreadsOversubscription();
OMP_ATTRS uint32_t getAssumeThreadsOversubscription();

/// Return the amount of dynamic shared memory that was allocated at launch.
uint64_t getDynamicMemorySize();
OMP_ATTRS uint64_t getDynamicMemorySize();

/// Returns the cycles per second of the device's fixed frequency clock.
uint64_t getClockFrequency();
OMP_ATTRS uint64_t getClockFrequency();

/// Returns the pointer to the beginning of the indirect call table.
void *getIndirectCallTablePtr();
OMP_ATTRS void *getIndirectCallTablePtr();

/// Returns the size of the indirect call table.
uint64_t getIndirectCallTableSize();
OMP_ATTRS uint64_t getIndirectCallTableSize();

/// Returns the size of the indirect call table.
uint64_t getHardwareParallelism();
OMP_ATTRS uint64_t getHardwareParallelism();

/// Return if debugging is enabled for the given debug kind.
bool isDebugMode(DeviceDebugKind Level);
OMP_ATTRS bool isDebugMode(DeviceDebugKind Level);

/// Indicates if this kernel may require thread-specific states, or if it was
/// explicitly disabled by the user.
bool mayUseThreadStates();
OMP_ATTRS bool mayUseThreadStates();

/// Indicates if this kernel may require data environments for nested
/// parallelism, or if it was explicitly disabled by the user.
bool mayUseNestedParallelism();
OMP_ATTRS bool mayUseNestedParallelism();

} // namespace config
} // namespace ompx
Expand Down
11 changes: 6 additions & 5 deletions offload/DeviceRTL/include/Debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,12 @@
///
/// {
extern "C" {
void __assert_assume(bool condition);
void __assert_fail(const char *expr, const char *file, unsigned line,
const char *function);
void __assert_fail_internal(const char *expr, const char *msg, const char *file,
unsigned line, const char *function);
OMP_ATTRS void __assert_assume(bool condition);
OMP_ATTRS void __assert_fail(const char *expr, const char *file, unsigned line,
const char *function);
OMP_ATTRS void __assert_fail_internal(const char *expr, const char *msg,
const char *file, unsigned line,
const char *function);
}

#define ASSERT(expr, msg) \
Expand Down
4 changes: 1 addition & 3 deletions offload/DeviceRTL/include/DeviceTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@
// another function but only inline assembly that performs some operation or
// side-effect and then continues execution with something on the existing call
// stack.
//
// TODO: Find a good place for this
#pragma omp assumes ext_no_call_asm
#define OMP_ATTRS [[omp::assume("ext_no_call_asm"), gnu::visibility("hidden")]]

enum omp_proc_bind_t {
omp_proc_bind_false = 0,
Expand Down
21 changes: 12 additions & 9 deletions offload/DeviceRTL/include/DeviceUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,32 +60,35 @@ struct remove_addrspace<T [[clang::address_space(N)]]> : type_identity<T> {};
template <class T>
using remove_addrspace_t = typename remove_addrspace<T>::type;

template <typename To, typename From> inline To bitCast(From V) {
template <typename To, typename From> OMP_ATTRS inline To bitCast(From V) {
static_assert(sizeof(To) == sizeof(From), "Bad conversion");
return __builtin_bit_cast(To, V);
}

/// Return the value \p Var from thread Id \p SrcLane in the warp if the thread
/// is identified by \p Mask.
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
OMP_ATTRS int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane,
int32_t Width);

int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
OMP_ATTRS int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
int32_t Width);

int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
OMP_ATTRS int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
int32_t Width);

uint64_t ballotSync(uint64_t Mask, int32_t Pred);
OMP_ATTRS uint64_t ballotSync(uint64_t Mask, int32_t Pred);

/// Return \p LowBits and \p HighBits packed into a single 64 bit value.
uint64_t pack(uint32_t LowBits, uint32_t HighBits);
OMP_ATTRS uint64_t pack(uint32_t LowBits, uint32_t HighBits);

/// Unpack \p Val into \p LowBits and \p HighBits.
void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);
OMP_ATTRS void unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits);

/// Return true iff \p Ptr is pointing into shared (local) memory (AS(3)).
bool isSharedMemPtr(void *Ptr);
OMP_ATTRS bool isSharedMemPtr(void *Ptr);

/// Return true iff \p Ptr is pointing into (thread) local memory (AS(5)).
bool isThreadLocalMemPtr(void *Ptr);
OMP_ATTRS bool isThreadLocalMemPtr(void *Ptr);

/// A pointer variable that has by design an `undef` value. Use with care.
[[clang::loader_uninitialized]] static void *const UndefPtr;
Expand Down
Loading
Loading