diff --git a/offload/include/device.h b/offload/include/device.h index bf93ce0460aef..4e27943d1dbc1 100644 --- a/offload/include/device.h +++ b/offload/include/device.h @@ -158,6 +158,9 @@ struct DeviceTy { /// Ask the device whether the runtime should use auto zero-copy. bool useAutoZeroCopy(); + /// Ask the device whether the storage is accessible. + bool isAccessiblePtr(const void *Ptr, size_t Size); + /// Check if there are pending images for this device. bool hasPendingImages() const { return HasPendingImages; } diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h index 8fd722bb15022..6328e29127aa4 100644 --- a/offload/include/omptarget.h +++ b/offload/include/omptarget.h @@ -280,6 +280,7 @@ int omp_get_initial_device(void); void *omp_target_alloc(size_t Size, int DeviceNum); void omp_target_free(void *DevicePtr, int DeviceNum); int omp_target_is_present(const void *Ptr, int DeviceNum); +int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum); int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice); diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp index b0f0573833713..48b086d671285 100644 --- a/offload/libomptarget/OpenMP/API.cpp +++ b/offload/libomptarget/OpenMP/API.cpp @@ -196,6 +196,34 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { return Rc; } +/// Check whether a pointer is accessible from a device. +/// Returns true when accessibility is guaranteed otherwise returns false. +EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, + int DeviceNum) { + TIMESCOPE(); + OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0))); + DP("Call to omp_target_is_accessible for device %d, address " DPxMOD + ", size %zu\n", + DeviceNum, DPxPTR(Ptr), Size); + + if (!Ptr) { + DP("Call to omp_target_is_accessible with NULL ptr returning false\n"); + return false; + } + + if (DeviceNum == omp_get_initial_device() || DeviceNum == -1) { + DP("Call to omp_target_is_accessible on host, returning true\n"); + return true; + } + + // The device number must refer to a valid device + auto DeviceOrErr = PM->getDevice(DeviceNum); + if (!DeviceOrErr) + FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); + + return DeviceOrErr->isAccessiblePtr(Ptr, Size); +} + EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length, size_t DstOffset, size_t SrcOffset, int DstDevice, int SrcDevice) { diff --git a/offload/libomptarget/device.cpp b/offload/libomptarget/device.cpp index 71423ae0c94d9..ee36fbed935a5 100644 --- a/offload/libomptarget/device.cpp +++ b/offload/libomptarget/device.cpp @@ -367,3 +367,7 @@ bool DeviceTy::useAutoZeroCopy() { return false; return RTL->use_auto_zero_copy(RTLDeviceID); } + +bool DeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) { + return RTL->is_accessible_ptr(RTLDeviceID, Ptr, Size); +} diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports index 8e2db6ba8bba4..95ddd03bb46a3 100644 --- a/offload/libomptarget/exports +++ b/offload/libomptarget/exports @@ -43,6 +43,7 @@ VERS1.0 { omp_get_initial_device; omp_target_alloc; omp_target_free; + omp_target_is_accessible; omp_target_is_present; omp_target_memcpy; omp_target_memcpy_rect; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 1d33bfc1a0be9..bdac35ae971c4 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -3027,6 +3027,29 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return ((IsAPU || OMPX_ApuMaps) && IsXnackEnabled); } + bool isAccessiblePtrImpl(const void *Ptr, size_t Size) override { + hsa_amd_pointer_info_t Info; + Info.size = sizeof(hsa_amd_pointer_info_t); + + hsa_agent_t *Agents = nullptr; + uint32_t Count = 0; + hsa_status_t Status = + hsa_amd_pointer_info(Ptr, &Info, malloc, &Count, &Agents); + + if (Status != HSA_STATUS_SUCCESS) + return false; + + // Checks if the pointer is known by HSA and accessible by the device + for (uint32_t i = 0; i < Count; i++) + if (Agents[i].handle == getAgent().handle) + return Info.sizeInBytes >= Size; + + // If the pointer is unknown to HSA it's assumed a host pointer + // in that case the device can access it on unified memory support is + // enabled + return IsXnackEnabled; + } + /// Getters and setters for stack and heap sizes. Error getDeviceStackSize(uint64_t &Value) override { Value = StackSize; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index ce66d277d6187..907c21ec662d3 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -1069,6 +1069,13 @@ struct GenericDeviceTy : public DeviceAllocatorTy { bool useAutoZeroCopy(); virtual bool useAutoZeroCopyImpl() { return false; } + /// Returns true if the plugin can guarantee that the associated + /// storage is accessible + bool isAccessiblePtr(const void *Ptr, size_t Size); + virtual bool isAccessiblePtrImpl(const void *Ptr, size_t Size) { + return false; + } + virtual Expected createInterop(int32_t InteropType, interop_spec_t &InteropSpec) { return nullptr; @@ -1499,6 +1506,9 @@ struct GenericPluginTy { /// Returns if the plugin can support automatic copy. int32_t use_auto_zero_copy(int32_t DeviceId); + /// Returns if the associated storage is accessible for a given device. + int32_t is_accessible_ptr(int32_t DeviceId, const void *Ptr, size_t Size); + /// Look up a global symbol in the given binary. int32_t get_global(__tgt_device_binary Binary, uint64_t Size, const char *Name, void **DevicePtr); diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 9f830874d5dad..0ab3d8dc4c69c 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1593,6 +1593,10 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) { bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); } +bool GenericDeviceTy::isAccessiblePtr(const void *Ptr, size_t Size) { + return isAccessiblePtrImpl(Ptr, Size); +} + Error GenericPluginTy::init() { if (Initialized) return Plugin::success(); @@ -2147,6 +2151,11 @@ int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) { return getDevice(DeviceId).useAutoZeroCopy(); } +int32_t GenericPluginTy::is_accessible_ptr(int32_t DeviceId, const void *Ptr, + size_t Size) { + return getDevice(DeviceId).isAccessiblePtr(Ptr, Size); +} + int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size, const char *Name, void **DevicePtr) { assert(Binary.handle && "Invalid device binary handle"); diff --git a/offload/test/mapping/is_accessible.cpp b/offload/test/mapping/is_accessible.cpp new file mode 100644 index 0000000000000..6d6a0048e01f3 --- /dev/null +++ b/offload/test/mapping/is_accessible.cpp @@ -0,0 +1,40 @@ +// RUN: %libomptarget-compilexx-generic +// RUN: env HSA_XNACK=1 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic + +// RUN: %libomptarget-compilexx-generic +// RUN: env HSA_XNACK=0 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=NO_USM + +// REQUIRES: unified_shared_memory +// REQUIRES: amdgpu + +// CHECK: SUCCESS +// NO_USM: Not accessible + +#include +#include +#include +#include + +int main() { + int n = 10000; + int *a = new int[n]; + int err = 0; + + // program must be executed with HSA_XNACK=1 + if (!omp_target_is_accessible(a, n * sizeof(int), /*device_num=*/0)) + printf("Not accessible\n"); + else { +#pragma omp target teams distribute parallel for + for (int i = 0; i < n; i++) + a[i] = i; + + for (int i = 0; i < n; i++) + if (a[i] != i) + err++; + } + + printf("%s\n", err == 0 ? "SUCCESS" : "FAIL"); + return err; +}