From f127125f0d657b4c0e3e035e4e3caae100f0256a Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Fri, 12 Sep 2025 14:16:32 -0700 Subject: [PATCH 1/4] [Offload][OpenMP] Support shadow-pointer tracking for Fortran descriptors. This change adds support for saving full contents of attached Fortran descriptors, and not just their pointee address, in the shadow-pointer table. With this, we now support: * comparing full contents of descriptors to check whether a previous shadow-pointer entry is stale; * restoring the full contents of descriptors With this, we can now use ATTACH map-types for properly mapping Fortran pointer/allocatable arrays, and array-sections on them. e.g.: ```f90 integer, allocatable :: x(:) !$omp target enter data map(to: x(:)) ``` as: ``` void* addr_of_pointee = allocated(x) ? &x(1) : nullptr; void* addr_of_descriptor = &x; int64_t sizeof_pointee = allocated(x) ? sizeof(x(:)) : 0 addr_of_pointee, addr_of_pointee, sizeof_pointee, TO addr_of_descriptor, addr_of_pointee, sizeof(x), ATTACH ``` --- offload/include/OpenMP/Mapping.h | 59 +++++++++++++++++++- offload/libomptarget/omptarget.cpp | 89 ++++++++++++++++++------------ 2 files changed, 109 insertions(+), 39 deletions(-) diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h index 93c1e56905ae4..a08572112760d 100644 --- a/offload/include/OpenMP/Mapping.h +++ b/offload/include/OpenMP/Mapping.h @@ -49,9 +49,46 @@ class MappingConfig { /// Information about shadow pointers. struct ShadowPtrInfoTy { void **HstPtrAddr = nullptr; - void *HstPtrVal = nullptr; void **TgtPtrAddr = nullptr; - void *TgtPtrVal = nullptr; + int64_t PtrSize = sizeof(void *); // Size of the pointer/descriptor + + // Store the complete contents for both host and target pointers/descriptors. + // 128 bytes is chosen as the "Small" size to cover common Fortran + // descriptors of up to 3 dimensions. + llvm::SmallVector HstPtrContent; + llvm::SmallVector TgtPtrContent; + + ShadowPtrInfoTy(void **HstPtrAddr, void **TgtPtrAddr, void *TgtPteeBase, + int64_t PtrSize) + : HstPtrAddr(HstPtrAddr), TgtPtrAddr(TgtPtrAddr), PtrSize(PtrSize), + HstPtrContent(PtrSize), TgtPtrContent(PtrSize) { + constexpr int64_t VoidPtrSize = sizeof(void *); + assert(HstPtrAddr != nullptr && "HstPtrAddr is nullptr"); + assert(TgtPtrAddr != nullptr && "TgtPtrAddr is nullptr"); + assert(PtrSize >= VoidPtrSize && "PtrSize is less than sizeof(void *)"); + + void *HstPteeBase = *HstPtrAddr; + // The first VoidPtrSize bytes for HstPtrContent/TgtPtrContent are from + // HstPteeBase/TgtPteeBase. + std::memcpy(HstPtrContent.data(), &HstPteeBase, VoidPtrSize); + std::memcpy(TgtPtrContent.data(), &TgtPteeBase, VoidPtrSize); + + // If we are not dealing with Fortran descriptors (pointers larger than + // VoidPtrSize), then that's that. + if (PtrSize <= VoidPtrSize) + return; + + // For larger pointers, i.e. Fortran descriptors, the remaining contents of + // the descriptor come from the host descriptor, i.e. HstPtrAddr. + std::memcpy(HstPtrContent.data() + VoidPtrSize, + reinterpret_cast(HstPtrAddr) + VoidPtrSize, + PtrSize - VoidPtrSize); + std::memcpy(TgtPtrContent.data() + VoidPtrSize, + reinterpret_cast(TgtPtrAddr) + VoidPtrSize, + PtrSize - VoidPtrSize); + } + + ShadowPtrInfoTy() = delete; bool operator==(const ShadowPtrInfoTy &Other) const { return HstPtrAddr == Other.HstPtrAddr; @@ -243,9 +280,25 @@ struct HostDataToTargetTy { auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo); if (Pair.second) return true; + // Check for a stale entry, if found, replace the old one. - if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal) + + // For Fortran descriptors, we need to compare their full contents, + // as the starting address may be the same while other fields have + // been updated. e.g. + // + // !$omp target enter data map(x(1:100)) ! (1) + // p => x(10: 19) + // !$omp target enter data map(p, p(:)) ! (2) + // p => x(5: 9) + // !$omp target enter data map(attach(always): p(:)) ! (3) + // + // While &desc_p and &p(1) (TgtPtrAddr and first "sizeof(void*)" bytes of + // TgtPtrContent) are same for (2) and (3), the pointer attachment for (3) + // needs to update the bounds information in the descriptor of p on device. + if ((*Pair.first).TgtPtrContent == ShadowPtrInfo.TgtPtrContent) return false; + States->ShadowPtrInfos.erase(ShadowPtrInfo); return addShadowPointer(ShadowPtrInfo); } diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 4c8eba1e7180c..3b522bff331ac 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -35,7 +35,6 @@ #include #include -#include using llvm::SmallVector; #ifdef OMPT_SUPPORT @@ -396,7 +395,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, assert(PtrTPR.getEntry() && "Need a valid pointer entry to perform pointer-attachment"); - int64_t VoidPtrSize = sizeof(void *); + constexpr int64_t VoidPtrSize = sizeof(void *); assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small"); uint64_t Delta = reinterpret_cast(HstPteeBegin) - @@ -411,23 +410,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin)); // Add shadow pointer tracking - // TODO: Support shadow-tracking of larger than VoidPtrSize pointers, - // to support restoration of Fortran descriptors. Currently, this check - // would return false, even if the host Fortran descriptor had been - // updated since its previous map, and we should have updated its - // device counterpart. e.g. - // - // !$omp target enter data map(x(1:100)) ! (1) - // p => x(10: 19) - // !$omp target enter data map(p, p(:)) ! (2) - // p => x(5: 9) - // !$omp target enter data map(attach(always): p(:)) ! (3) - // - // While PtrAddr(&desc_p) and PteeBase(&p(1)) are same for (2) and (3), the - // pointer attachment for (3) needs to update the bounds information - // in the descriptor of p on device. if (!PtrTPR.getEntry()->addShadowPointer( - ShadowPtrInfoTy{HstPtrAddr, HstPteeBase, TgtPtrAddr, TgtPteeBase})) { + ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase, HstPtrSize})) { DP("Pointer " DPxMOD " is already attached to " DPxMOD "\n", DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase)); return OFFLOAD_SUCCESS; @@ -940,17 +924,29 @@ postProcessingTargetDataEnd(DeviceTy *Device, DelEntry = false; } - // If we copied back to the host a struct/array containing pointers, - // we need to restore the original host pointer values from their - // shadow copies. If the struct is going to be deallocated, remove any - // remaining shadow pointer entries for this struct. + // If we copied back to the host a struct/array containing pointers, or + // Fortran descriptors (which are larger than a "void *"), we need to + // restore the original host pointer/descriptor values from their shadow + // copies. If the struct is going to be deallocated, remove any remaining + // shadow pointer entries for this struct. const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM; if (HasFrom) { Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) { - *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + constexpr int64_t VoidPtrSize = sizeof(void *); + if (ShadowPtr.PtrSize > VoidPtrSize) { + DP("Restoring host descriptor " DPxMOD + " to its original content (%" PRId64 + " bytes), containing pointee address " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize, + DPxPTR(ShadowPtr.HstPtrContent.data())); + } else { + DP("Restoring host pointer " DPxMOD " to its original value " DPxMOD + "\n", + DPxPTR(ShadowPtr.HstPtrAddr), + DPxPTR(ShadowPtr.HstPtrContent.data())); + } + std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(), + ShadowPtr.PtrSize); return OFFLOAD_SUCCESS; }); } @@ -1163,12 +1159,22 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, if (TPR.getEntry()) { int Ret = TPR.getEntry()->foreachShadowPointerInfo( [&](ShadowPtrInfoTy &ShadowPtr) { - DP("Restoring original target pointer value " DPxMOD " for target " - "pointer " DPxMOD "\n", - DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr)); + constexpr int64_t VoidPtrSize = sizeof(void *); + if (ShadowPtr.PtrSize > VoidPtrSize) { + DP("Restoring target descriptor " DPxMOD + " to its original content (%" PRId64 + " bytes), containing pointee address " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrAddr), ShadowPtr.PtrSize, + DPxPTR(ShadowPtr.TgtPtrContent.data())); + } else { + DP("Restoring target pointer " DPxMOD + " to its original value " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrAddr), + DPxPTR(ShadowPtr.TgtPtrContent.data())); + } Ret = Device.submitData(ShadowPtr.TgtPtrAddr, - (void *)&ShadowPtr.TgtPtrVal, - sizeof(void *), AsyncInfo); + ShadowPtr.TgtPtrContent.data(), + ShadowPtr.PtrSize, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; @@ -1193,15 +1199,26 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, } // Wait for device-to-host memcopies for whole struct to complete, - // before restoring the correct host pointer. + // before restoring the correct host pointer/descriptor. if (auto *Entry = TPR.getEntry()) { AsyncInfo.addPostProcessingFunction([=]() -> int { int Ret = Entry->foreachShadowPointerInfo( [&](const ShadowPtrInfoTy &ShadowPtr) { - *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD - " for host pointer " DPxMOD "\n", - DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + constexpr int64_t VoidPtrSize = sizeof(void *); + if (ShadowPtr.PtrSize > VoidPtrSize) { + DP("Restoring host descriptor " DPxMOD + " to its original content (%" PRId64 + " bytes), containing pointee address " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize, + DPxPTR(ShadowPtr.HstPtrContent.data())); + } else { + DP("Restoring host pointer " DPxMOD + " to its original value " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrAddr), + DPxPTR(ShadowPtr.HstPtrContent.data())); + } + std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(), + ShadowPtr.PtrSize); return OFFLOAD_SUCCESS; }); Entry->unlock(); From afa872fcad9820d02f2e043e6d349156f049489d Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Fri, 12 Sep 2025 14:47:24 -0700 Subject: [PATCH 2/4] Change the 'small' size for SmallVectors. --- offload/include/OpenMP/Mapping.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h index a08572112760d..82f509e518789 100644 --- a/offload/include/OpenMP/Mapping.h +++ b/offload/include/OpenMP/Mapping.h @@ -53,10 +53,10 @@ struct ShadowPtrInfoTy { int64_t PtrSize = sizeof(void *); // Size of the pointer/descriptor // Store the complete contents for both host and target pointers/descriptors. - // 128 bytes is chosen as the "Small" size to cover common Fortran + // 96 bytes is chosen as the "Small" size to cover simple Fortran // descriptors of up to 3 dimensions. - llvm::SmallVector HstPtrContent; - llvm::SmallVector TgtPtrContent; + llvm::SmallVector HstPtrContent; + llvm::SmallVector TgtPtrContent; ShadowPtrInfoTy(void **HstPtrAddr, void **TgtPtrAddr, void *TgtPteeBase, int64_t PtrSize) From 26c71d277c1087c7eeb7b5f121759a5da9c2c9c8 Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Fri, 12 Sep 2025 14:49:10 -0700 Subject: [PATCH 3/4] Remove unrelated change --- offload/libomptarget/omptarget.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 3b522bff331ac..39286d41ec865 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -35,6 +35,7 @@ #include #include +#include using llvm::SmallVector; #ifdef OMPT_SUPPORT From 0c32767afa2806e5c2b53e0dffdd6ab9105d9c45 Mon Sep 17 00:00:00 2001 From: Abhinav Gaba Date: Sat, 13 Sep 2025 02:26:15 -0700 Subject: [PATCH 4/4] Copy remaining bytes for TgtPtrAddr from from host descriptor, as the comment said. --- offload/include/OpenMP/Mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h index 82f509e518789..45bd9c6e7da8b 100644 --- a/offload/include/OpenMP/Mapping.h +++ b/offload/include/OpenMP/Mapping.h @@ -84,7 +84,7 @@ struct ShadowPtrInfoTy { reinterpret_cast(HstPtrAddr) + VoidPtrSize, PtrSize - VoidPtrSize); std::memcpy(TgtPtrContent.data() + VoidPtrSize, - reinterpret_cast(TgtPtrAddr) + VoidPtrSize, + reinterpret_cast(HstPtrAddr) + VoidPtrSize, PtrSize - VoidPtrSize); }