Skip to content

Commit 4777579

Browse files
abhinavgabaronlieb
authored andcommitted
[Offload][OpenMP] Support shadow-pointer tracking for Fortran descriptors. (llvm#158370)
This change adds support for saving full contents of attached Fortran descriptors, and not just their pointee address, in the shadow-pointer table. With this, we now support: * comparing full contents of descriptors to check whether a previous shadow-pointer entry is stale; * restoring the full contents of descriptors And with that, we can now use ATTACH map-types (added in llvm#149036) for mapping Fortran pointer/allocatable arrays, and array-sections on them. e.g.: ```f90 integer, allocatable :: x(:) !$omp target enter data map(to: x(:)) ``` as: ``` void* addr_of_pointee = allocated(x) ? &x(1) : nullptr; int64_t sizeof_pointee = allocated(x) ? sizeof(x(:)) : 0 addr_of_pointee, addr_of_pointee, sizeof_pointee, TO addr_of_descriptor, addr_of_pointee, size_of_descriptor, ATTACH ```
1 parent 7e6479d commit 4777579

File tree

3 files changed

+111
-43
lines changed

3 files changed

+111
-43
lines changed

offload/include/OpenMP/Mapping.h

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,46 @@ class MappingConfig {
4949
/// Information about shadow pointers.
5050
struct ShadowPtrInfoTy {
5151
void **HstPtrAddr = nullptr;
52-
void *HstPtrVal = nullptr;
5352
void **TgtPtrAddr = nullptr;
54-
void *TgtPtrVal = nullptr;
53+
int64_t PtrSize = sizeof(void *); // Size of the pointer/descriptor
54+
55+
// Store the complete contents for both host and target pointers/descriptors.
56+
// 96 bytes is chosen as the "Small" size to cover simple Fortran
57+
// descriptors of up to 3 dimensions.
58+
llvm::SmallVector<char, 96> HstPtrContent;
59+
llvm::SmallVector<char, 96> TgtPtrContent;
60+
61+
ShadowPtrInfoTy(void **HstPtrAddr, void **TgtPtrAddr, void *TgtPteeBase,
62+
int64_t PtrSize)
63+
: HstPtrAddr(HstPtrAddr), TgtPtrAddr(TgtPtrAddr), PtrSize(PtrSize),
64+
HstPtrContent(PtrSize), TgtPtrContent(PtrSize) {
65+
constexpr int64_t VoidPtrSize = sizeof(void *);
66+
assert(HstPtrAddr != nullptr && "HstPtrAddr is nullptr");
67+
assert(TgtPtrAddr != nullptr && "TgtPtrAddr is nullptr");
68+
assert(PtrSize >= VoidPtrSize && "PtrSize is less than sizeof(void *)");
69+
70+
void *HstPteeBase = *HstPtrAddr;
71+
// The first VoidPtrSize bytes for HstPtrContent/TgtPtrContent are from
72+
// HstPteeBase/TgtPteeBase.
73+
std::memcpy(HstPtrContent.data(), &HstPteeBase, VoidPtrSize);
74+
std::memcpy(TgtPtrContent.data(), &TgtPteeBase, VoidPtrSize);
75+
76+
// If we are not dealing with Fortran descriptors (pointers larger than
77+
// VoidPtrSize), then that's that.
78+
if (PtrSize <= VoidPtrSize)
79+
return;
80+
81+
// For larger pointers, i.e. Fortran descriptors, the remaining contents of
82+
// the descriptor come from the host descriptor, i.e. HstPtrAddr.
83+
std::memcpy(HstPtrContent.data() + VoidPtrSize,
84+
reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize,
85+
PtrSize - VoidPtrSize);
86+
std::memcpy(TgtPtrContent.data() + VoidPtrSize,
87+
reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize,
88+
PtrSize - VoidPtrSize);
89+
}
90+
91+
ShadowPtrInfoTy() = delete;
5592

5693
bool operator==(const ShadowPtrInfoTy &Other) const {
5794
return HstPtrAddr == Other.HstPtrAddr;
@@ -245,9 +282,25 @@ struct HostDataToTargetTy {
245282
auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
246283
if (Pair.second)
247284
return true;
285+
248286
// Check for a stale entry, if found, replace the old one.
249-
if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
287+
288+
// For Fortran descriptors, we need to compare their full contents,
289+
// as the starting address may be the same while other fields have
290+
// been updated. e.g.
291+
//
292+
// !$omp target enter data map(x(1:100)) ! (1)
293+
// p => x(10: 19)
294+
// !$omp target enter data map(p, p(:)) ! (2)
295+
// p => x(5: 9)
296+
// !$omp target enter data map(attach(always): p(:)) ! (3)
297+
//
298+
// While &desc_p and &p(1) (TgtPtrAddr and first "sizeof(void*)" bytes of
299+
// TgtPtrContent) are same for (2) and (3), the pointer attachment for (3)
300+
// needs to update the bounds information in the descriptor of p on device.
301+
if ((*Pair.first).TgtPtrContent == ShadowPtrInfo.TgtPtrContent)
250302
return false;
303+
251304
States->ShadowPtrInfos.erase(ShadowPtrInfo);
252305
return addShadowPointer(ShadowPtrInfo);
253306
}

offload/libomptarget/omptarget.cpp

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,8 @@ static int prepareAndSubmitData(DeviceTy &Device, void *HstPtrBegin,
319319
void *ExpectedTgtPtrBase = (void *)((uint64_t)LocalTgtPtrBegin - Delta);
320320

321321
if (PointerTpr.getEntry()->addShadowPointer(
322-
ShadowPtrInfoTy{(void **)PointerHstPtrBegin, HstPtrBase,
323-
(void **)PointerTgtPtrBegin, ExpectedTgtPtrBase})) {
322+
ShadowPtrInfoTy{(void **)PointerHstPtrBegin,
323+
(void **)PointerTgtPtrBegin, ExpectedTgtPtrBase, sizeof(void*)})) {
324324
DP("USM_SPECIAL: Update pointer (" DPxMOD ") -> [" DPxMOD "]\n",
325325
DPxPTR(PointerTgtPtrBegin), DPxPTR(LocalTgtPtrBegin));
326326

@@ -407,7 +407,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
407407
assert(PtrTPR.getEntry() &&
408408
"Need a valid pointer entry to perform pointer-attachment");
409409

410-
int64_t VoidPtrSize = sizeof(void *);
410+
constexpr int64_t VoidPtrSize = sizeof(void *);
411411
assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
412412

413413
uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
@@ -422,23 +422,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
422422
DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
423423

424424
// Add shadow pointer tracking
425-
// TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
426-
// to support restoration of Fortran descriptors. Currently, this check
427-
// would return false, even if the host Fortran descriptor had been
428-
// updated since its previous map, and we should have updated its
429-
// device counterpart. e.g.
430-
//
431-
// !$omp target enter data map(x(1:100)) ! (1)
432-
// p => x(10: 19)
433-
// !$omp target enter data map(p, p(:)) ! (2)
434-
// p => x(5: 9)
435-
// !$omp target enter data map(attach(always): p(:)) ! (3)
436-
//
437-
// While PtrAddr(&desc_p) and PteeBase(&p(1)) are same for (2) and (3), the
438-
// pointer attachment for (3) needs to update the bounds information
439-
// in the descriptor of p on device.
440425
if (!PtrTPR.getEntry()->addShadowPointer(
441-
ShadowPtrInfoTy{HstPtrAddr, HstPteeBase, TgtPtrAddr, TgtPteeBase})) {
426+
ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase, HstPtrSize})) {
442427
DP("Pointer " DPxMOD " is already attached to " DPxMOD "\n",
443428
DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase));
444429
return OFFLOAD_SUCCESS;
@@ -969,10 +954,11 @@ postProcessingTargetDataEnd(DeviceTy *Device,
969954
DelEntry = false;
970955
}
971956

972-
// If we copied back to the host a struct/array containing pointers,
973-
// we need to restore the original host pointer values from their
974-
// shadow copies. If the struct is going to be deallocated, remove any
975-
// remaining shadow pointer entries for this struct.
957+
// If we copied back to the host a struct/array containing pointers, or
958+
// Fortran descriptors (which are larger than a "void *"), we need to
959+
// restore the original host pointer/descriptor values from their shadow
960+
// copies. If the struct is going to be deallocated, remove any remaining
961+
// shadow pointer entries for this struct.
976962
const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
977963
if (HasFrom) {
978964
Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) {
@@ -981,10 +967,21 @@ postProcessingTargetDataEnd(DeviceTy *Device,
981967
PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY;
982968
if (*ShadowPtr.HstPtrAddr == nullptr || isZeroCopy || isUSMMode)
983969
return OFFLOAD_SUCCESS;
984-
*ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
985-
DP("Restoring original host pointer value " DPxMOD " for host "
986-
"pointer " DPxMOD "\n",
987-
DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr));
970+
constexpr int64_t VoidPtrSize = sizeof(void *);
971+
if (ShadowPtr.PtrSize > VoidPtrSize) {
972+
DP("Restoring host descriptor " DPxMOD
973+
" to its original content (%" PRId64
974+
" bytes), containing pointee address " DPxMOD "\n",
975+
DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize,
976+
DPxPTR(ShadowPtr.HstPtrContent.data()));
977+
} else {
978+
DP("Restoring host pointer " DPxMOD " to its original value " DPxMOD
979+
"\n",
980+
DPxPTR(ShadowPtr.HstPtrAddr),
981+
DPxPTR(ShadowPtr.HstPtrContent.data()));
982+
}
983+
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
984+
ShadowPtr.PtrSize);
988985
return OFFLOAD_SUCCESS;
989986
});
990987
}
@@ -1197,12 +1194,22 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
11971194
if (TPR.getEntry()) {
11981195
int Ret = TPR.getEntry()->foreachShadowPointerInfo(
11991196
[&](ShadowPtrInfoTy &ShadowPtr) {
1200-
DP("Restoring original target pointer value " DPxMOD " for target "
1201-
"pointer " DPxMOD "\n",
1202-
DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr));
1197+
constexpr int64_t VoidPtrSize = sizeof(void *);
1198+
if (ShadowPtr.PtrSize > VoidPtrSize) {
1199+
DP("Restoring target descriptor " DPxMOD
1200+
" to its original content (%" PRId64
1201+
" bytes), containing pointee address " DPxMOD "\n",
1202+
DPxPTR(ShadowPtr.TgtPtrAddr), ShadowPtr.PtrSize,
1203+
DPxPTR(ShadowPtr.TgtPtrContent.data()));
1204+
} else {
1205+
DP("Restoring target pointer " DPxMOD
1206+
" to its original value " DPxMOD "\n",
1207+
DPxPTR(ShadowPtr.TgtPtrAddr),
1208+
DPxPTR(ShadowPtr.TgtPtrContent.data()));
1209+
}
12031210
Ret = Device.submitData(ShadowPtr.TgtPtrAddr,
1204-
(void *)&ShadowPtr.TgtPtrVal,
1205-
sizeof(void *), AsyncInfo);
1211+
ShadowPtr.TgtPtrContent.data(),
1212+
ShadowPtr.PtrSize, AsyncInfo);
12061213
if (Ret != OFFLOAD_SUCCESS) {
12071214
REPORT("Copying data to device failed.\n");
12081215
return OFFLOAD_FAIL;
@@ -1227,7 +1234,7 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
12271234
}
12281235

12291236
// Wait for device-to-host memcopies for whole struct to complete,
1230-
// before restoring the correct host pointer.
1237+
// before restoring the correct host pointer/descriptor.
12311238
if (auto *Entry = TPR.getEntry()) {
12321239
AsyncInfo.addPostProcessingFunction([=]() -> int {
12331240
int Ret = Entry->foreachShadowPointerInfo(
@@ -1238,10 +1245,21 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
12381245
PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY;
12391246
if (*ShadowPtr.HstPtrAddr == nullptr || isZeroCopy || isUSMMode)
12401247
return OFFLOAD_SUCCESS;
1241-
*ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
1242-
DP("Restoring original host pointer value " DPxMOD
1243-
" for host pointer " DPxMOD "\n",
1244-
DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr));
1248+
constexpr int64_t VoidPtrSize = sizeof(void *);
1249+
if (ShadowPtr.PtrSize > VoidPtrSize) {
1250+
DP("Restoring host descriptor " DPxMOD
1251+
" to its original content (%" PRId64
1252+
" bytes), containing pointee address " DPxMOD "\n",
1253+
DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize,
1254+
DPxPTR(ShadowPtr.HstPtrContent.data()));
1255+
} else {
1256+
DP("Restoring host pointer " DPxMOD
1257+
" to its original value " DPxMOD "\n",
1258+
DPxPTR(ShadowPtr.HstPtrAddr),
1259+
DPxPTR(ShadowPtr.HstPtrContent.data()));
1260+
}
1261+
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
1262+
ShadowPtr.PtrSize);
12451263
return OFFLOAD_SUCCESS;
12461264
});
12471265
Entry->unlock();

revert_patches.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,3 @@ d57230c7 [AMDGPU][MC] Disallow op_sel in some VOP3P dot instructions (#100485)
1515
Revert "[SLP]Support LShr as base for copyable elements"
1616
breaks build rocPRIM
1717
---
18-
build issues
19-
5af3fa81cc12 [Offload][OpenMP] Support shadow-pointer tracking
20-
---

0 commit comments

Comments
 (0)