Skip to content

Commit 9d97424

Browse files
committed
[OpenMP] Adds omp_target_is_accessible routine
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
1 parent 29b6433 commit 9d97424

File tree

9 files changed

+110
-1
lines changed

9 files changed

+110
-1
lines changed

offload/include/device.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,9 @@ struct DeviceTy {
158158
/// Ask the device whether the runtime should use auto zero-copy.
159159
bool useAutoZeroCopy();
160160

161+
/// Ask the device whether it supports unified memory.
162+
bool supportsUnifiedMemory();
163+
161164
/// Check if there are pending images for this device.
162165
bool hasPendingImages() const { return HasPendingImages; }
163166

offload/include/omptarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ int omp_get_initial_device(void);
280280
void *omp_target_alloc(size_t Size, int DeviceNum);
281281
void omp_target_free(void *DevicePtr, int DeviceNum);
282282
int omp_target_is_present(const void *Ptr, int DeviceNum);
283+
int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum);
283284
int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
284285
size_t DstOffset, size_t SrcOffset, int DstDevice,
285286
int SrcDevice);

offload/libomptarget/OpenMP/API.cpp

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,8 @@ EXTERN int omp_get_device_num(void) {
9494
EXTERN int omp_get_initial_device(void) {
9595
TIMESCOPE();
9696
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
97-
int HostDevice = omp_get_num_devices();
97+
int NumDevices = omp_get_num_devices();
98+
int HostDevice = NumDevices == 0 ? -1 : NumDevices;
9899
DP("Call to omp_get_initial_device returning %d\n", HostDevice);
99100
return HostDevice;
100101
}
@@ -196,6 +197,48 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
196197
return Rc;
197198
}
198199

200+
/// Check whether a pointer is accessible from a device.
201+
/// the functionality is available in OpenMP 5.1 and later
202+
/// OpenMP 5.1
203+
/// omp_target_is_accessible checks whether a host pointer is accessible from a
204+
/// device OpenMP 6.0 removes restriction on pointer, allowing any pointer
205+
/// interpreted as a pointer in the address space of the given device.
206+
EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size,
207+
int DeviceNum) {
208+
TIMESCOPE();
209+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
210+
DP("Call to omp_target_is_accessible for device %d, address " DPxMOD
211+
", size %zu\n",
212+
DeviceNum, DPxPTR(Ptr), Size);
213+
214+
if (!Ptr) {
215+
DP("Call to omp_target_is_accessible with NULL ptr returning false\n");
216+
return false;
217+
}
218+
219+
if (DeviceNum == omp_get_initial_device()) {
220+
DP("Call to omp_target_is_accessible on host, returning true\n");
221+
return true;
222+
}
223+
224+
// the device number must refer to a valid device
225+
auto DeviceOrErr = PM->getDevice(DeviceNum);
226+
if (!DeviceOrErr)
227+
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
228+
229+
// for OpenMP 5.1 the routine checks whether a host pointer is accessible from
230+
// the device this requires for the device to support unified shared memory
231+
if (DeviceOrErr->supportsUnifiedMemory()) {
232+
DP("Device %d supports unified memory, returning true\n", DeviceNum);
233+
return true;
234+
}
235+
236+
// functionality to check whether a device pointer is accessible from a device
237+
// (OpenMP 6.0) from the host might not be possible
238+
DP("Device %d does not support unified memory, returning false\n", DeviceNum);
239+
return false;
240+
}
241+
199242
EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
200243
size_t DstOffset, size_t SrcOffset, int DstDevice,
201244
int SrcDevice) {

offload/libomptarget/device.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,3 +367,7 @@ bool DeviceTy::useAutoZeroCopy() {
367367
return false;
368368
return RTL->use_auto_zero_copy(RTLDeviceID);
369369
}
370+
371+
bool DeviceTy::supportsUnifiedMemory() {
372+
return RTL->supports_unified_memory(RTLDeviceID);
373+
}

offload/libomptarget/exports

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ VERS1.0 {
4343
omp_get_initial_device;
4444
omp_target_alloc;
4545
omp_target_free;
46+
omp_target_is_accessible;
4647
omp_target_is_present;
4748
omp_target_memcpy;
4849
omp_target_memcpy_rect;

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3027,6 +3027,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
30273027
return ((IsAPU || OMPX_ApuMaps) && IsXnackEnabled);
30283028
}
30293029

3030+
bool supportsUnifiedMemoryImpl() override { return IsXnackEnabled; }
3031+
30303032
/// Getters and setters for stack and heap sizes.
30313033
Error getDeviceStackSize(uint64_t &Value) override {
30323034
Value = StackSize;

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,6 +1093,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
10931093
bool useAutoZeroCopy();
10941094
virtual bool useAutoZeroCopyImpl() { return false; }
10951095

1096+
/// Returns true if the device has unified memory capabilities
1097+
bool supportsUnifiedMemory();
1098+
virtual bool supportsUnifiedMemoryImpl() { return false; }
1099+
10961100
virtual Expected<omp_interop_val_t *>
10971101
createInterop(int32_t InteropType, interop_spec_t &InteropSpec) {
10981102
return nullptr;
@@ -1523,6 +1527,9 @@ struct GenericPluginTy {
15231527
/// Returns if the plugin can support automatic copy.
15241528
int32_t use_auto_zero_copy(int32_t DeviceId);
15251529

1530+
/// Returns if the the device supports unified memory.
1531+
int32_t supports_unified_memory(int32_t DeviceId);
1532+
15261533
/// Look up a global symbol in the given binary.
15271534
int32_t get_global(__tgt_device_binary Binary, uint64_t Size,
15281535
const char *Name, void **DevicePtr);

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1607,6 +1607,10 @@ Error GenericDeviceTy::syncEvent(void *EventPtr) {
16071607

16081608
bool GenericDeviceTy::useAutoZeroCopy() { return useAutoZeroCopyImpl(); }
16091609

1610+
bool GenericDeviceTy::supportsUnifiedMemory() {
1611+
return supportsUnifiedMemoryImpl();
1612+
}
1613+
16101614
Error GenericPluginTy::init() {
16111615
if (Initialized)
16121616
return Plugin::success();
@@ -2159,6 +2163,10 @@ int32_t GenericPluginTy::use_auto_zero_copy(int32_t DeviceId) {
21592163
return getDevice(DeviceId).useAutoZeroCopy();
21602164
}
21612165

2166+
int32_t GenericPluginTy::supports_unified_memory(int32_t DeviceId) {
2167+
return getDevice(DeviceId).supportsUnifiedMemory();
2168+
}
2169+
21622170
int32_t GenericPluginTy::get_global(__tgt_device_binary Binary, uint64_t Size,
21632171
const char *Name, void **DevicePtr) {
21642172
assert(Binary.handle && "Invalid device binary handle");
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// RUN: %libomptarget-compilexx-generic
2+
// RUN: env HSA_XNACK=1 %libomptarget-run-generic 2>&1 \
3+
// RUN: | %fcheck-generic
4+
5+
// RUN: %libomptarget-compilexx-generic
6+
// RUN: env HSA_XNACK=0 %libomptarget-run-generic 2>&1 \
7+
// RUN: | %fcheck-generic -check-prefix=NO_USM
8+
9+
// REQUIRES: unified_shared_memory
10+
// REQUIRES: amdgpu
11+
12+
// CHECK: SUCCESS
13+
// NO_USM: Not accessible
14+
15+
#include <assert.h>
16+
#include <iostream>
17+
#include <omp.h>
18+
#include <stdio.h>
19+
20+
int main() {
21+
int n = 10000;
22+
int *a = new int[n];
23+
int err = 0;
24+
25+
// program must be executed with HSA_XNACK=1
26+
if (!omp_target_is_accessible(a, n * sizeof(int), /*device_num=*/0))
27+
printf("Not accessible\n");
28+
else {
29+
#pragma omp target teams distribute parallel for
30+
for (int i = 0; i < n; i++)
31+
a[i] = i;
32+
33+
for (int i = 0; i < n; i++)
34+
if (a[i] != i)
35+
err++;
36+
}
37+
38+
printf("%s\n", err == 0 ? "SUCCESS" : "FAIL");
39+
return err;
40+
}

0 commit comments

Comments
 (0)