Skip to content

Commit 025d36e

Browse files
committed
[OpenMP] Adds omp_target_is_accessible routine
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
1 parent a22d010 commit 025d36e

File tree

10 files changed

+111
-2
lines changed

10 files changed

+111
-2
lines changed

clang/docs/OpenMPSupport.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ implementation.
256256
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
257257
| device | device-specific environment variables | :none:`unclaimed` | |
258258
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
259-
| device | omp_target_is_accessible routine | :none:`unclaimed` | |
259+
| device | omp_target_is_accessible routine | :part:`worked on` | https://github.com/llvm/llvm-project/pull/138294 |
260260
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
261261
| device | omp_get_mapped_ptr routine | :good:`done` | D141545 |
262262
+------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+

offload/include/device.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ struct DeviceTy {
152152
/// Ask the device whether the runtime should use auto zero-copy.
153153
bool useAutoZeroCopy();
154154

155+
/// Ask the device whether it supports unified memory.
156+
bool supportsUnifiedMemory();
157+
155158
/// Check if there are pending images for this device.
156159
bool hasPendingImages() const { return HasPendingImages; }
157160

offload/include/omptarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ int omp_get_initial_device(void);
280280
void *omp_target_alloc(size_t Size, int DeviceNum);
281281
void omp_target_free(void *DevicePtr, int DeviceNum);
282282
int omp_target_is_present(const void *Ptr, int DeviceNum);
283+
int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum);
283284
int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
284285
size_t DstOffset, size_t SrcOffset, int DstDevice,
285286
int SrcDevice);

offload/libomptarget/OpenMP/API.cpp

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ EXTERN int omp_get_device_num(void) {
9393
EXTERN int omp_get_initial_device(void) {
9494
TIMESCOPE();
9595
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
96-
int HostDevice = omp_get_num_devices();
96+
int NumDevices = omp_get_num_devices();
97+
int HostDevice = NumDevices == 0 ? -1 : NumDevices;
9798
DP("Call to omp_get_initial_device returning %d\n", HostDevice);
9899
return HostDevice;
99100
}
@@ -195,6 +196,48 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
195196
return Rc;
196197
}
197198

199+
/// Check whether a pointer is accessible from a device.
200+
/// the functionality is available in OpenMP 5.1 and later
201+
/// OpenMP 5.1
202+
/// omp_target_is_accessible checks whether a host pointer is accessible from a
203+
/// device OpenMP 6.0 removes restriction on pointer, allowing any pointer
204+
/// interpreted as a pointer in the address space of the given device.
205+
EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size,
206+
int DeviceNum) {
207+
TIMESCOPE();
208+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
209+
DP("Call to omp_target_is_accessible for device %d, address " DPxMOD
210+
", size %zu\n",
211+
DeviceNum, DPxPTR(Ptr), Size);
212+
213+
if (!Ptr) {
214+
DP("Call to omp_target_is_accessible with NULL ptr returning false\n");
215+
return false;
216+
}
217+
218+
if (DeviceNum == omp_get_initial_device()) {
219+
DP("Call to omp_target_is_accessible on host, returning true\n");
220+
return true;
221+
}
222+
223+
// the device number must refer to a valid device
224+
auto DeviceOrErr = PM->getDevice(DeviceNum);
225+
if (!DeviceOrErr)
226+
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
227+
228+
// for OpenMP 5.1 the routine checks whether a host pointer is accessible from
229+
// the device this requires for the device to support unified shared memory
230+
if (DeviceOrErr->supportsUnifiedMemory()) {
231+
DP("Device %d supports unified memory, returning true\n", DeviceNum);
232+
return true;
233+
}
234+
235+
// functionality to check whether a device pointer is accessible from a device
236+
// (OpenMP 6.0) from the host might not be possible
237+
DP("Device %d does not support unified memory, returning false\n", DeviceNum);
238+
return false;
239+
}
240+
198241
EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
199242
size_t DstOffset, size_t SrcOffset, int DstDevice,
200243
int SrcDevice) {

offload/libomptarget/device.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,3 +281,7 @@ bool DeviceTy::useAutoZeroCopy() {
281281
return false;
282282
return RTL->use_auto_zero_copy(RTLDeviceID);
283283
}
284+
285+
bool DeviceTy::supportsUnifiedMemory() {
286+
return RTL->supports_unified_memory(RTLDeviceID);
287+
}

offload/libomptarget/exports

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ VERS1.0 {
4242
omp_get_initial_device;
4343
omp_target_alloc;
4444
omp_target_free;
45+
omp_target_is_accessible;
4546
omp_target_is_present;
4647
omp_target_memcpy;
4748
omp_target_memcpy_rect;

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2821,6 +2821,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
28212821
return ((IsAPU || OMPX_ApuMaps) && IsXnackEnabled);
28222822
}
28232823

2824+
bool supportsUnifiedMemoryImpl() override { return IsXnackEnabled; }
2825+
28242826
/// Getters and setters for stack and heap sizes.
28252827
Error getDeviceStackSize(uint64_t &Value) override {
28262828
Value = StackSize;

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,6 +1003,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10031003
bool useAutoZeroCopy();
10041004
virtual bool useAutoZeroCopyImpl() { return false; }
10051005

1006+
/// Returns true if the device has unified memory capabilities
1007+
bool supportsUnifiedMemory();
1008+
virtual bool supportsUnifiedMemoryImpl() { return false; }
1009+
10061010
/// Allocate and construct a kernel object.
10071011
virtual Expected<GenericKernelTy &> constructKernel(const char *Name) = 0;
10081012

@@ -1402,6 +1406,9 @@ struct GenericPluginTy {
14021406
/// Returns if the plugin can support automatic copy.
14031407
int32_t use_auto_zero_copy(int32_t DeviceId);
14041408

1409+
/// Returns if the the device supports unified memory.
1410+
int32_t supports_unified_memory(int32_t DeviceId);
1411+
14051412
/// Look up a global symbol in the given binary.
14061413
int32_t get_global(__tgt_device_binary Binary, uint64_t Size,
14071414
const char *Name, void **DevicePtr);

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,6 +1629,10 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) {
16291629

16301630
bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }
16311631

1632+
bool GenericDeviceTy::supportsUnifiedMemory() {
1633+
return supportsUnifiedMemoryImpl();
1634+
}
1635+
16321636
Error GenericPluginTy::init() {
16331637
if (Initialized)
16341638
return Plugin::success();
@@ -2181,6 +2185,10 @@ int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) {
21812185
return getDevice(DeviceId).useAutoZeroCopy();
21822186
}
21832187

2188+
int32_t GenericPluginTy::supports_unified_memory(int32_t DeviceId) {
2189+
return getDevice(DeviceId).supportsUnifiedMemory();
2190+
}
2191+
21842192
int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size,
21852193
const char *Name, void **DevicePtr) {
21862194
assert(Binary.handle && "Invalid device binary handle");
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// RUN: %libomptarget-compilexx-generic
2+
// RUN: env HSA_XNACK=1 %libomptarget-run-generic 2>&1 \
3+
// RUN: | %fcheck-generic
4+
5+
// RUN: %libomptarget-compilexx-generic
6+
// RUN: env HSA_XNACK=0 %libomptarget-run-generic 2>&1 \
7+
// RUN: | %fcheck-generic -check-prefix=NO_USM
8+
9+
// REQUIRES: unified_shared_memory
10+
// REQUIRES: amdgpu
11+
12+
// CHECK: SUCCESS
13+
// NO_USM: Not accessible
14+
15+
#include <assert.h>
16+
#include <iostream>
17+
#include <omp.h>
18+
#include <stdio.h>
19+
20+
int main() {
21+
int n = 10000;
22+
int *a = new int[n];
23+
int err = 0;
24+
25+
// program must be executed with HSA_XNACK=1
26+
if (!omp_target_is_accessible(a, n * sizeof(int), /*device_num=*/0))
27+
printf("Not accessible\n");
28+
else {
29+
#pragma omp target teams distribute parallel for
30+
for (int i = 0; i < n; i++)
31+
a[i] = i;
32+
33+
for (int i = 0; i < n; i++)
34+
if (a[i] != i)
35+
err++;
36+
}
37+
38+
printf("%s\n", err == 0 ? "SUCCESS" : "FAIL");
39+
return err;
40+
}

0 commit comments

Comments
 (0)