diff --git a/offload/DeviceRTL/include/DeviceUtils.h b/offload/DeviceRTL/include/DeviceUtils.h index 549ca16e1c34c..dd3d02448fe55 100644 --- a/offload/DeviceRTL/include/DeviceUtils.h +++ b/offload/DeviceRTL/include/DeviceUtils.h @@ -29,6 +29,9 @@ int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width); uint64_t ballotSync(uint64_t Mask, int32_t Pred); +/// Terminate the execution of this warp. +void terminateWarp(); + /// Return \p LowBits and \p HighBits packed into a single 64 bit value. uint64_t pack(uint32_t LowBits, uint32_t HighBits); diff --git a/offload/DeviceRTL/src/DeviceUtils.cpp b/offload/DeviceRTL/src/DeviceUtils.cpp index c204a7be73b1f..d325447cbf8e1 100644 --- a/offload/DeviceRTL/src/DeviceUtils.cpp +++ b/offload/DeviceRTL/src/DeviceUtils.cpp @@ -38,6 +38,7 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, int32_t Width); uint64_t ballotSync(uint64_t Mask, int32_t Pred); +void terminateWarp(); /// AMDGCN Implementation /// @@ -62,6 +63,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) { return Mask & __builtin_amdgcn_ballot_w64(Pred); } +void terminateWarp() { __builtin_amdgcn_endpgm(); } + bool isSharedMemPtr(const void *Ptr) { return __builtin_amdgcn_is_shared( (const __attribute__((address_space(0))) void *)Ptr); @@ -89,6 +92,8 @@ uint64_t ballotSync(uint64_t Mask, int32_t Pred) { return __nvvm_vote_ballot_sync(static_cast(Mask), Pred); } +void terminateWarp() { __nvvm_exit(); } + bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); } #pragma omp end declare variant @@ -126,6 +131,8 @@ uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) { return impl::ballotSync(Mask, Pred); } +void utils::terminateWarp() { return impl::terminateWarp(); } + bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); } extern "C" { diff --git a/offload/DeviceRTL/src/Sanitizer.cpp b/offload/DeviceRTL/src/Sanitizer.cpp index d524eed0d9a93..6d68a383b56f3 100644 --- a/offload/DeviceRTL/src/Sanitizer.cpp +++ b/offload/DeviceRTL/src/Sanitizer.cpp @@ -13,7 +13,6 @@ #include "Mapping.h" #include "Shared/Environment.h" #include "Synchronization.h" -#include "gpuintrin.h" using namespace ompx; @@ -69,7 +68,7 @@ void raiseExecutionError(SanitizerEnvironmentTy::ErrorCodeTy ErrorCode, // If no thread of this warp has the lock, end execution gracefully. bool AnyThreadHasLock = utils::ballotSync(lanes::All, HasLock); if (!AnyThreadHasLock) - __gpu_exit(); + utils::terminateWarp(); // One thread will set the location information and signal that the rest of // the wapr that the actual trap can be executed now. diff --git a/offload/test/jit/type_punning.c b/offload/test/jit/type_punning.c index 574168b8a69cb..46f8ba62d0df1 100644 --- a/offload/test/jit/type_punning.c +++ b/offload/test/jit/type_punning.c @@ -13,8 +13,8 @@ // Ensure that there is only the kernel function left, not any outlined // parallel regions. // -// CHECK: define -// CHECK-NOT: define +// CHECK: define {{.*}}offloading +// CHECK-NOT: define {{.*}}offloading #include #include diff --git a/offload/test/sanitizer/kernel_trap_all.c b/offload/test/sanitizer/kernel_trap_all.c index 379ca8362aa83..4f3fc84784987 100644 --- a/offload/test/sanitizer/kernel_trap_all.c +++ b/offload/test/sanitizer/kernel_trap_all.c @@ -3,15 +3,9 @@ // RUN: %libomptarget-compile-generic -g -mllvm -amdgpu-enable-offload-sanitizer // RUN: %not --crash env -u LLVM_DISABLE_SYMBOLIZATION OFFLOAD_TRACK_NUM_KERNEL_LAUNCH_TRACES=1 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK,TRACE,DEBUG // RUN: %not --crash %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefixes=CHECK +// clang-format on -// UNSUPPORTED: nvptx64-nvidia-cuda -// UNSUPPORTED: nvptx64-nvidia-cuda-LTO -// UNSUPPORTED: aarch64-unknown-linux-gnu -// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO -// UNSUPPORTED: x86_64-pc-linux-gnu -// UNSUPPORTED: x86_64-pc-linux-gnu-LTO -// UNSUPPORTED: s390x-ibm-linux-gnu -// UNSUPPORTED: s390x-ibm-linux-gnu-LTO +// REQUIRES: amdgpu #include @@ -23,9 +17,11 @@ int main(void) { __builtin_trap(); } } -// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_l20) +// clang-format off +// CHECK: OFFLOAD ERROR: Kernel {{.*}} (__omp_offloading_{{.*}}_main_{{.*}}) // CHECK: OFFLOAD ERROR: execution interrupted by hardware trap instruction // CHECK: Triggered by thread <{{[0-9]*}},0,0> block <{{[0-9]*}},0,0> PC 0x{{.*}} // TRACE: launchKernel // NDEBG: main // DEBUG: main {{.*}}kernel_trap_all.c: +// clang-format on