-
Notifications
You must be signed in to change notification settings - Fork 15k
[OpenMP] Adds omp_target_is_accessible routine #138294
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-offload Author: None (nicebert) ChangesAdds omp_target_is_accessible routine. Full diff: https://github.com/llvm/llvm-project/pull/138294.diff 5 Files Affected:
diff --git a/offload/include/omptarget.h b/offload/include/omptarget.h
index 6971780c7bdb5..8af8c4f659b35 100644
--- a/offload/include/omptarget.h
+++ b/offload/include/omptarget.h
@@ -280,6 +280,7 @@ int omp_get_initial_device(void);
void *omp_target_alloc(size_t Size, int DeviceNum);
void omp_target_free(void *DevicePtr, int DeviceNum);
int omp_target_is_present(const void *Ptr, int DeviceNum);
+int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum);
int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
size_t DstOffset, size_t SrcOffset, int DstDevice,
int SrcDevice);
diff --git a/offload/libomptarget/OpenMP/API.cpp b/offload/libomptarget/OpenMP/API.cpp
index 4576f9bd06121..a0a126004d3f9 100644
--- a/offload/libomptarget/OpenMP/API.cpp
+++ b/offload/libomptarget/OpenMP/API.cpp
@@ -39,6 +39,8 @@ EXTERN void ompx_dump_mapping_tables() {
using namespace llvm::omp::target::ompt;
#endif
+int checkTargetAddressMapping(const void *Ptr, size_t Size, int DeviceNum, const char *Name);
+
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name);
void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
@@ -168,33 +170,25 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) {
DP("Call to omp_target_is_present for device %d and address " DPxMOD "\n",
DeviceNum, DPxPTR(Ptr));
- if (!Ptr) {
- DP("Call to omp_target_is_present with NULL ptr, returning false\n");
- return false;
- }
-
- if (DeviceNum == omp_get_initial_device()) {
- DP("Call to omp_target_is_present on host, returning true\n");
- return true;
- }
-
- auto DeviceOrErr = PM->getDevice(DeviceNum);
- if (!DeviceOrErr)
- FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
-
// omp_target_is_present tests whether a host pointer refers to storage that
// is mapped to a given device. However, due to the lack of the storage size,
// only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
// length array) is mapped instead of the referred storage.
- TargetPointerResultTy TPR =
- DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), 1,
- /*UpdateRefCount=*/false,
- /*UseHoldRefCount=*/false);
- int Rc = TPR.isPresent();
- DP("Call to omp_target_is_present returns %d\n", Rc);
- return Rc;
+ return checkTargetAddressMapping(Ptr, 1, DeviceNum, "omp_target_is_present");
}
+EXTERN int omp_target_is_accessible(const void *Ptr, size_t Size, int DeviceNum) {
+ OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
+ DP("Call to omp_target_is_accessible for device %d and address " DPxMOD
+ " with size %zu\n",
+ DeviceNum, DPxPTR(Ptr), Size);
+
+ // omp_target_is_accessible tests whether a host pointer refers to storage
+ // that is mapped to a given device and is accessible from the device. The
+ // storage size is provided.
+ return checkTargetAddressMapping(Ptr, Size, DeviceNum, "omp_target_is_accessible");
+}
+
EXTERN int omp_target_memcpy(void *Dst, const void *Src, size_t Length,
size_t DstOffset, size_t SrcOffset, int DstDevice,
int SrcDevice) {
diff --git a/offload/libomptarget/exports b/offload/libomptarget/exports
index 2406776c1fb5f..0b770a2f1980a 100644
--- a/offload/libomptarget/exports
+++ b/offload/libomptarget/exports
@@ -37,6 +37,7 @@ VERS1.0 {
__kmpc_push_target_tripcount_mapper;
ompx_dump_mapping_tables;
omp_get_mapped_ptr;
+ omp_target_is_accessible;
omp_get_num_devices;
omp_get_device_num;
omp_get_initial_device;
diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp
index 5b25d955dd320..8716b33ce068a 100644
--- a/offload/libomptarget/omptarget.cpp
+++ b/offload/libomptarget/omptarget.cpp
@@ -198,6 +198,31 @@ static int32_t getParentIndex(int64_t Type) {
return ((Type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
}
+int checkTargetAddressMapping(const void *Ptr, size_t Size, int DeviceNum, const char *Name) {
+ if (!Ptr) {
+ DP("Call to %s with NULL ptr, returning false\n", Name);
+ return false;
+ }
+
+ if (DeviceNum == omp_get_initial_device()) {
+ DP("Call to %s on host, returning true\n", Name);
+ return true;
+ }
+
+ auto DeviceOrErr = PM->getDevice(DeviceNum);
+ if (!DeviceOrErr)
+ FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str());
+
+ TargetPointerResultTy TPR =
+ DeviceOrErr->getMappingInfo().getTgtPtrBegin(const_cast<void *>(Ptr), Size,
+ false,
+ false);
+
+ int Rc = TPR.isPresent();
+ DP("Call to %s returns %d\n", Name, Rc);
+ return Rc;
+}
+
void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
const char *Name) {
DP("Call to %s for device %d requesting %zu bytes\n", Name, DeviceNum, Size);
diff --git a/offload/test/mapping/is_accessible.cpp b/offload/test/mapping/is_accessible.cpp
new file mode 100644
index 0000000000000..daf38e7afaf76
--- /dev/null
+++ b/offload/test/mapping/is_accessible.cpp
@@ -0,0 +1,43 @@
+// RUN: %libomptarget-compilexx-generic
+// RUN: %libomptarget-run-generic 2>&1 \
+// RUN: | %fcheck-generic
+
+// REQUIRES: unified_shared_memory
+
+#include <stdio.h>
+#include <iostream>
+#include <omp.h>
+#include <assert.h>
+
+// The runtime considers unified shared memory to be always present.
+#pragma omp requires unified_shared_memory
+
+int main() {
+ int size = 10;
+ int *x = (int *)malloc(size * sizeof(int));
+ const int dev_num = omp_get_default_device();
+
+ int is_accessible = omp_target_is_accessible(x, size * sizeof(int), dev_num);
+ int errors = 0;
+ int uses_shared_memory = 0;
+
+ #pragma omp target map(to: uses_shared_memory)
+ uses_shared_memory = 1;
+
+ assert(uses_shared_memory != is_accessible);
+
+ if (is_accessible) {
+ #pragma omp target firstprivate(x)
+ for (int i = 0; i < size; i++)
+ x[i] = i * 3;
+
+ for (int i = 0; i < size; i++)
+ errors += (x[i] == (i * 3) ? 1 : 0);
+ }
+
+ free(x);
+ // CHECK: x overwritten 0 times
+ printf("x overwritten %d times\n", errors);
+
+ return errors;
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
e3a5812 to
0a9bb0f
Compare
0a9bb0f to
3c22b15
Compare
3c22b15 to
3c092a7
Compare
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
3c092a7 to
025d36e
Compare
Co-authored-by: Shilei Tian <[email protected]>
Co-authored-by: Shilei Tian <[email protected]>
|
FWIW, #143058 seems like doing the same thing. |
|
after Monday's discussion in the Accelerator subcommittee call I'm re-working the implementation to what was discussed & create a pr to clarify the wording in the spec. |
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
…lvm-project into feat/omp_target_is_accessible
…lvm-project into feat/omp_target_is_accessible
Adds implementation of omp_target_is_accessible routine with 5.1 behaviour, checking if a host pointer is acccessible from a device without running on the device (from the host).
|
@carlobertolli can you review this. I've reworked the patch using hsa_amd_pointer_info as we discussed. @CatherineMoore FIY |
…lvm-project into feat/omp_target_is_accessible
The implemetation is allowed to return -1 for the host device number. To be complient with the spec both the device number needs to be checked against both -1 as well as the value returned by omp_get_initial_device.
…lvm-project into feat/omp_target_is_accessible
| // The device number must refer to a valid device | ||
| auto DeviceOrErr = PM->getDevice(DeviceNum); | ||
| if (!DeviceOrErr) | ||
| FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder whether this is a fatal message or simple just return false? What does the spec say when the device number is invalid?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @shiltian : let's return false if the device doesn't exist. A warning is also in order.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can make this change but then we probably need to reconsider how we handle invalid devices in all cases in the runtime since in the API.cpp file alone it's handled in this way for:
is_present
memcpy
(dis)associate_ptr
get_mapped_ptr
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a question to spec. If spec doesn't say anything, my $.02 is to return false. We probably don't want a program to crash immediately with invalid input.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That doesn't change the fact that we have those other functions that don't state that the program is to crash immediately with invalid input. Do we then need to adjust those functions as well to return false and emit a warning? I don't think it's sensible to have one function act in one way and one in another if the device is invalid.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes indeed. We definitely don't want inconsistent behavior. We need to update them all (in a separate PR for others which may also server as an RFC).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think crashing is the better behavior for several reasons.
-
It is in line with what the other routines do and also how kernel invocations fail when the device number does not exist.
-
While returning would be OK, because passing an incorrect device number does constitute unspecified behavior and the implementation is allowed to not check for this to happen, it may make debugging much harder. The failing code will return a seemingly useful result and thus introduce some inconsistent state in the program. I would expect that shortly after the code would re-use incorrect device number to invoke a kernel and thus fail there. So, we can also fail early.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that is different. For a target region, yes, crash is okay, since the spec doesn't seem to say anything about how to deal with invalid input in clauses, based on my understanding (might be out of date though). APIs, on the other hand, is different. I think it'd not be a good idea someone calls some functions with invalid input and then their program crashes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is a discussion we need to have in another setting than this issue as it does not just affect this API routine but existing routines (of which I stated a couple of examples above). My implementation follows the way this is currently handled so I think if you want to change the handling the discussion needs to be had in a different setting with a bigger audience.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair enough. CC @jhuber6 Maybe a topic for the next Wed meeting. :-)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With the requested changes, this patch is, in my opinion, ready to land.
| // The device number must refer to a valid device | ||
| auto DeviceOrErr = PM->getDevice(DeviceNum); | ||
| if (!DeviceOrErr) | ||
| FATAL_MESSAGE(DeviceNum, "%s", toString(DeviceOrErr.takeError()).c_str()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @shiltian : let's return false if the device doesn't exist. A warning is also in order.
| // If the pointer is unknown to HSA it's assumed a host pointer | ||
| // in that case the device can access it on unified memory support is | ||
| // enabled | ||
| return IsXnackEnabled; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just a nit: there are GPUs that do not have xnack, but still have the ability to access host memory. This behavior is fine for now, but we will have to revisit based on GPU (later, not in this PR).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then it should be documented.
Co-authored-by: Shilei Tian <[email protected]>
Addresses rewiev comments. Changes LIT test to XFAIL on nvptx. Changes implementation to provide better debug information to the user about failures.
…lvm-project into feat/omp_target_is_accessible
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Marks omp_target_is_accessible routine as done.
…lvm-project into feat/omp_target_is_accessible
Adds omp_target_is_accessible routine.
Refactors common code from omp_target_is_present to work for both routines.