-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[OpenMP] Adds omp_target_is_accessible routine #138294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-offload Author: None (nicebert) ChangesAdds omp_target_is_accessible routine. Full diff: https://github.com/llvm/llvm-project/pull/138294.diff 5 Files Affected:
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 6971780c7bdb5..8af8c4f659b35 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -280,6 +280,7 @@ int omp_get_initial_device(void);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
+int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum);
int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
size_t DstOffset, size_t SrcOffset, int DstDevice,
int SrcDevice);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..a0a126004d3f9 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -39,6 +39,8 @@ EXTERN void ompx_dump_mapping_tables() {
using namespace llvm::omp::target::ompt;
#endif
+int checkTargetAddressMapping(const void *Ptr, size_t Size, int DeviceNum, const char *Name);
+
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name);
void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
@@ -168,33 +170,25 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
DeviceNum, DPxPTR(Ptr));
- if (!Ptr) {
- DP("Call to omp_target_is_present with NULL ptr, returning false\n");
- return false;
- }
-
- if (DeviceNum == omp_get_initial_device()) {
- DP("Call to omp_target_is_present on host, returning true\n");
- return true;
- }
-
- auto DeviceOrErr = PM->getDevice(DeviceNum);
- if (!DeviceOrErr)
- FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
-
// omp_target_is_present tests whether a host pointer refers to storage that
// is mapped to a given device. However, due to the lack of the storage size,
// only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
// length array) is mapped instead of the referred storage.
- TargetPointerResultTy TPR =
- DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
- /*UpdateRefCount=*/false,
- /*UseHoldRefCount=*/false);
- int Rc = TPR.isPresent();
- DP("Call to omp_target_is_present returns %d\n", Rc);
- return Rc;
+ return checkTargetAddressMapping(Ptr, 1, DeviceNum, "omp_target_is_present");
}
+EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ DP("Call to omp_target_is_accessible for device %d and address " DPxMOD
+ " with size %zu\n",
+ DeviceNum, DPxPTR(Ptr), Size);
+
+ // omp_target_is_accessible tests whether a host pointer refers to storage
+ // that is mapped to a given device and is accessible from the device. The
+ // storage size is provided.
+ return checkTargetAddressMapping(Ptr, Size, DeviceNum, "omp_target_is_accessible");
+}
+
EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
size_t DstOffset, size_t SrcOffset, int DstDevice,
int SrcDevice) {
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..0b770a2f1980a 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -37,6 +37,7 @@ VERS1.0 {
__kmpc_push_target_tripcount_mapper;
ompx_dump_mapping_tables;
omp_get_mapped_ptr;
+ omp_target_is_accessible;
omp_get_num_devices;
omp_get_device_num;
omp_get_initial_device;
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 5b25d955dd320..8716b33ce068a 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -198,6 +198,31 @@ static int32_t getParentIndex(int64_t Type) {
return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
}
+int checkTargetAddressMapping(const void *Ptr, size_t Size, int DeviceNum, const char *Name) {
+ if (!Ptr) {
+ DP("Call to %s with NULL ptr, returning false\n", Name);
+ return false;
+ }
+
+ if (DeviceNum == omp_get_initial_device()) {
+ DP("Call to %s on host, returning true\n", Name);
+ return true;
+ }
+
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ TargetPointerResultTy TPR =
+ DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), Size,
+ false,
+ false);
+
+ int Rc = TPR.isPresent();
+ DP("Call to %s returns %d\n", Name, Rc);
+ return Rc;
+}
+
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name) {
DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size);
diff --git a/offload/test/mapping/is_accessible.cpp b/offload/test/mapping/is_accessible.cpp
new file mode 100644
index 0000000000000..daf38e7afaf76
--- /dev/null
+++ b/offload/test/mapping/is_accessible.cpp
@@ -0,0 +1,43 @@
+// RUN: %libomptarget-compilexx-generic
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// REQUIRES: unified_shared_memory
+
+#include <stdio.h>
+#include <iostream>
+#include <omp.h>
+#include <assert.h>
+
+// The runtime considers unified shared memory to be always present.
+#pragma omp requires unified_shared_memory
+
+int main() {
+ int size = 10;
+ int *x = (int *)malloc(size * sizeof(int));
+ const int dev_num = omp_get_default_device();
+
+ int is_accessible = omp_target_is_accessible(x, size * sizeof(int), dev_num);
+ int errors = 0;
+ int uses_shared_memory = 0;
+
+ #pragma omp target map(to: uses_shared_memory)
+ uses_shared_memory = 1;
+
+ assert(uses_shared_memory != is_accessible);
+
+ if (is_accessible) {
+ #pragma omp target firstprivate(x)
+ for (int i = 0; i < size; i++)
+ x[i] = i * 3;
+
+ for (int i = 0; i < size; i++)
+ errors += (x[i] == (i * 3) ? 1 : 0);
+ }
+
+ free(x);
+ // CHECK: x overwritten 0 times
+ printf("x overwritten %d times\n", errors);
+
+ return errors;
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
e3a5812
to
0a9bb0f
Compare
0a9bb0f
to
3c22b15
Compare
3c22b15
to
3c092a7
Compare
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
3c092a7
to
025d36e
Compare
Co-authored-by: Shilei Tian <[email protected]>
Co-authored-by: Shilei Tian <[email protected]>
FWIW, #143058 seems like doing the same thing. |
after Monday's discussion in the Accelerator subcommittee call I'm re-working the implementation to what was discussed & create a pr to clarify the wording in the spec. |
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
…lvm-project into feat/omp_target_is_accessible
…lvm-project into feat/omp_target_is_accessible
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
I think you're right. I must've messed up somewhere while rebasing |
oh I think it fixed itself by using the update branch feature on the PR @arsenm :) |
@carlobertolli can you review this. I've reworked the patch using hsa_amd_pointer_info as we discussed. @CatherineMoore FIY |
…lvm-project into feat/omp_target_is_accessible
The implemetation is allowed to return -1 for the host device number. To be complient with the spec both the device number needs to be checked against both -1 as well as the value returned by omp_get_initial_device.
…lvm-project into feat/omp_target_is_accessible
DeviceNum, DPxPTR(Ptr), Size); | ||
|
||
if (!Ptr) { | ||
DP("Call to omp_target_is_accessible with NULL ptr returning false\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does spec say so?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does. OMP Specs 6.0, page 607 "If ptr is NULL, the routine returns zero". Please check the specs before asking.
// The device number must refer to a valid device | ||
auto DeviceOrErr = PM->getDevice(DeviceNum); | ||
if (!DeviceOrErr) | ||
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder whether this is a fatal message or simple just return false? What does the spec say when the device number is invalid?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @shiltian : let's return false if the device doesn't exist. A warning is also in order.
hsa_status_t Status = | ||
hsa_amd_pointer_info(Ptr, &Info, malloc, &Count, &Agents); | ||
|
||
if (Status != HSA_STATUS_SUCCESS) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we log here what is the reason of failing?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess that the two semantically relevant errors here would be:
HSA_EXT_POINTER_TYPE_UNKNOWN
and
HSA_STATUS_ERROR_INVALID_ARGUMENT (which we should never get because we already checked whether ptr is null). The rest of the errors "out of resources, hsa not initialized" would have been caught earlier on, is my guess
for (uint32_t i = 0; i < Count; i++) | ||
if (Agents[i].handle == getAgent().handle) | ||
return Info.sizeInBytes >= Size; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
for (uint32_t i = 0; i < Count; i++) | |
if (Agents[i].handle == getAgent().handle) | |
return Info.sizeInBytes >= Size; | |
for (uint32_t i = 0; i < Count; i++) { | |
if (Agents[i].handle == getAgent().handle) | |
return Info.sizeInBytes >= Size; | |
} |
/// Returns true if the plugin can guarantee that the associated | ||
/// storage is accessible | ||
bool isAccessiblePtr(const void *Ptr, size_t Size); | ||
virtual bool isAccessiblePtrImpl(const void *Ptr, size_t Size) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this impl function is only for internal use, can we make them private?
// RUN: | %fcheck-generic -check-prefix=NO_USM | ||
|
||
// REQUIRES: unified_shared_memory | ||
// REQUIRES: amdgpu |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd make this a XFAIL for nvptx instead of a requirement, since it always returns false.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the requested changes, this patch is, in my opinion, ready to land.
DeviceNum, DPxPTR(Ptr), Size); | ||
|
||
if (!Ptr) { | ||
DP("Call to omp_target_is_accessible with NULL ptr returning false\n"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It does. OMP Specs 6.0, page 607 "If ptr is NULL, the routine returns zero". Please check the specs before asking.
// The device number must refer to a valid device | ||
auto DeviceOrErr = PM->getDevice(DeviceNum); | ||
if (!DeviceOrErr) | ||
FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @shiltian : let's return false if the device doesn't exist. A warning is also in order.
hsa_status_t Status = | ||
hsa_amd_pointer_info(Ptr, &Info, malloc, &Count, &Agents); | ||
|
||
if (Status != HSA_STATUS_SUCCESS) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I guess that the two semantically relevant errors here would be:
HSA_EXT_POINTER_TYPE_UNKNOWN
and
HSA_STATUS_ERROR_INVALID_ARGUMENT (which we should never get because we already checked whether ptr is null). The rest of the errors "out of resources, hsa not initialized" would have been caught earlier on, is my guess
// If the pointer is unknown to HSA it's assumed a host pointer | ||
// in that case the device can access it on unified memory support is | ||
// enabled | ||
return IsXnackEnabled; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a nit: there are GPUs that do not have xnack, but still have the ability to access host memory. This behavior is fine for now, but we will have to revisit based on GPU (later, not in this PR).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then it should be documented.
Adds omp_target_is_accessible routine.
Refactors common code from omp_target_is_present to work for both routines.