Skip to content

Commit de80b87

Browse files
committed
merge main into amd-staging
2 parents 39be8c6 + 5af3fa8 commit de80b87

File tree

2 files changed

+109
-49
lines changed

2 files changed

+109
-49
lines changed

offload/include/OpenMP/Mapping.h

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,46 @@ class MappingConfig {
4949
/// Information about shadow pointers.
5050
struct ShadowPtrInfoTy {
5151
void **HstPtrAddr = nullptr;
52-
void *HstPtrVal = nullptr;
5352
void **TgtPtrAddr = nullptr;
54-
void *TgtPtrVal = nullptr;
53+
int64_t PtrSize = sizeof(void *); // Size of the pointer/descriptor
54+
55+
// Store the complete contents for both host and target pointers/descriptors.
56+
// 96 bytes is chosen as the "Small" size to cover simple Fortran
57+
// descriptors of up to 3 dimensions.
58+
llvm::SmallVector<char, 96> HstPtrContent;
59+
llvm::SmallVector<char, 96> TgtPtrContent;
60+
61+
ShadowPtrInfoTy(void **HstPtrAddr, void **TgtPtrAddr, void *TgtPteeBase,
62+
int64_t PtrSize)
63+
: HstPtrAddr(HstPtrAddr), TgtPtrAddr(TgtPtrAddr), PtrSize(PtrSize),
64+
HstPtrContent(PtrSize), TgtPtrContent(PtrSize) {
65+
constexpr int64_t VoidPtrSize = sizeof(void *);
66+
assert(HstPtrAddr != nullptr && "HstPtrAddr is nullptr");
67+
assert(TgtPtrAddr != nullptr && "TgtPtrAddr is nullptr");
68+
assert(PtrSize >= VoidPtrSize && "PtrSize is less than sizeof(void *)");
69+
70+
void *HstPteeBase = *HstPtrAddr;
71+
// The first VoidPtrSize bytes for HstPtrContent/TgtPtrContent are from
72+
// HstPteeBase/TgtPteeBase.
73+
std::memcpy(HstPtrContent.data(), &HstPteeBase, VoidPtrSize);
74+
std::memcpy(TgtPtrContent.data(), &TgtPteeBase, VoidPtrSize);
75+
76+
// If we are not dealing with Fortran descriptors (pointers larger than
77+
// VoidPtrSize), then that's that.
78+
if (PtrSize <= VoidPtrSize)
79+
return;
80+
81+
// For larger pointers, i.e. Fortran descriptors, the remaining contents of
82+
// the descriptor come from the host descriptor, i.e. HstPtrAddr.
83+
std::memcpy(HstPtrContent.data() + VoidPtrSize,
84+
reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize,
85+
PtrSize - VoidPtrSize);
86+
std::memcpy(TgtPtrContent.data() + VoidPtrSize,
87+
reinterpret_cast<char *>(HstPtrAddr) + VoidPtrSize,
88+
PtrSize - VoidPtrSize);
89+
}
90+
91+
ShadowPtrInfoTy() = delete;
5592

5693
bool operator==(const ShadowPtrInfoTy &Other) const {
5794
return HstPtrAddr == Other.HstPtrAddr;
@@ -245,9 +282,25 @@ struct HostDataToTargetTy {
245282
auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
246283
if (Pair.second)
247284
return true;
285+
248286
// Check for a stale entry, if found, replace the old one.
249-
if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
287+
288+
// For Fortran descriptors, we need to compare their full contents,
289+
// as the starting address may be the same while other fields have
290+
// been updated. e.g.
291+
//
292+
// !$omp target enter data map(x(1:100)) ! (1)
293+
// p => x(10: 19)
294+
// !$omp target enter data map(p, p(:)) ! (2)
295+
// p => x(5: 9)
296+
// !$omp target enter data map(attach(always): p(:)) ! (3)
297+
//
298+
// While &desc_p and &p(1) (TgtPtrAddr and first "sizeof(void*)" bytes of
299+
// TgtPtrContent) are same for (2) and (3), the pointer attachment for (3)
300+
// needs to update the bounds information in the descriptor of p on device.
301+
if ((*Pair.first).TgtPtrContent == ShadowPtrInfo.TgtPtrContent)
250302
return false;
303+
251304
States->ShadowPtrInfos.erase(ShadowPtrInfo);
252305
return addShadowPointer(ShadowPtrInfo);
253306
}

offload/libomptarget/omptarget.cpp

Lines changed: 53 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
407407
assert(PtrTPR.getEntry() &&
408408
"Need a valid pointer entry to perform pointer-attachment");
409409

410-
int64_t VoidPtrSize = sizeof(void *);
410+
constexpr int64_t VoidPtrSize = sizeof(void *);
411411
assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");
412412

413413
uint64_t Delta = reinterpret_cast<uint64_t>(HstPteeBegin) -
@@ -422,23 +422,8 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
422422
DPxPTR(TgtPteeBase), DPxPTR(TgtPteeBegin));
423423

424424
// Add shadow pointer tracking
425-
// TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
426-
// to support restoration of Fortran descriptors. Currently, this check
427-
// would return false, even if the host Fortran descriptor had been
428-
// updated since its previous map, and we should have updated its
429-
// device counterpart. e.g.
430-
//
431-
// !$omp target enter data map(x(1:100)) ! (1)
432-
// p => x(10: 19)
433-
// !$omp target enter data map(p, p(:)) ! (2)
434-
// p => x(5: 9)
435-
// !$omp target enter data map(attach(always): p(:)) ! (3)
436-
//
437-
// While PtrAddr(&desc_p) and PteeBase(&p(1)) are same for (2) and (3), the
438-
// pointer attachment for (3) needs to update the bounds information
439-
// in the descriptor of p on device.
440425
if (!PtrTPR.getEntry()->addShadowPointer(
441-
ShadowPtrInfoTy{HstPtrAddr, HstPteeBase, TgtPtrAddr, TgtPteeBase})) {
426+
ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase, HstPtrSize})) {
442427
DP("Pointer " DPxMOD " is already attached to " DPxMOD "\n",
443428
DPxPTR(TgtPtrAddr), DPxPTR(TgtPteeBase));
444429
return OFFLOAD_SUCCESS;
@@ -969,22 +954,29 @@ postProcessingTargetDataEnd(DeviceTy *Device,
969954
DelEntry = false;
970955
}
971956

972-
// If we copied back to the host a struct/array containing pointers,
973-
// we need to restore the original host pointer values from their
974-
// shadow copies. If the struct is going to be deallocated, remove any
975-
// remaining shadow pointer entries for this struct.
957+
// If we copied back to the host a struct/array containing pointers, or
958+
// Fortran descriptors (which are larger than a "void *"), we need to
959+
// restore the original host pointer/descriptor values from their shadow
960+
// copies. If the struct is going to be deallocated, remove any remaining
961+
// shadow pointer entries for this struct.
976962
const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
977963
if (HasFrom) {
978964
Entry->foreachShadowPointerInfo([&](const ShadowPtrInfoTy &ShadowPtr) {
979-
const bool isZeroCopy = PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY;
980-
const bool isUSMMode =
981-
PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY;
982-
if (*ShadowPtr.HstPtrAddr == nullptr || isZeroCopy || isUSMMode)
983-
return OFFLOAD_SUCCESS;
984-
*ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
985-
DP("Restoring original host pointer value " DPxMOD " for host "
986-
"pointer " DPxMOD "\n",
987-
DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr));
965+
constexpr int64_t VoidPtrSize = sizeof(void *);
966+
if (ShadowPtr.PtrSize > VoidPtrSize) {
967+
DP("Restoring host descriptor " DPxMOD
968+
" to its original content (%" PRId64
969+
" bytes), containing pointee address " DPxMOD "\n",
970+
DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize,
971+
DPxPTR(ShadowPtr.HstPtrContent.data()));
972+
} else {
973+
DP("Restoring host pointer " DPxMOD " to its original value " DPxMOD
974+
"\n",
975+
DPxPTR(ShadowPtr.HstPtrAddr),
976+
DPxPTR(ShadowPtr.HstPtrContent.data()));
977+
}
978+
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
979+
ShadowPtr.PtrSize);
988980
return OFFLOAD_SUCCESS;
989981
});
990982
}
@@ -1197,12 +1189,22 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
11971189
if (TPR.getEntry()) {
11981190
int Ret = TPR.getEntry()->foreachShadowPointerInfo(
11991191
[&](ShadowPtrInfoTy &ShadowPtr) {
1200-
DP("Restoring original target pointer value " DPxMOD " for target "
1201-
"pointer " DPxMOD "\n",
1202-
DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr));
1192+
constexpr int64_t VoidPtrSize = sizeof(void *);
1193+
if (ShadowPtr.PtrSize > VoidPtrSize) {
1194+
DP("Restoring target descriptor " DPxMOD
1195+
" to its original content (%" PRId64
1196+
" bytes), containing pointee address " DPxMOD "\n",
1197+
DPxPTR(ShadowPtr.TgtPtrAddr), ShadowPtr.PtrSize,
1198+
DPxPTR(ShadowPtr.TgtPtrContent.data()));
1199+
} else {
1200+
DP("Restoring target pointer " DPxMOD
1201+
" to its original value " DPxMOD "\n",
1202+
DPxPTR(ShadowPtr.TgtPtrAddr),
1203+
DPxPTR(ShadowPtr.TgtPtrContent.data()));
1204+
}
12031205
Ret = Device.submitData(ShadowPtr.TgtPtrAddr,
1204-
(void *)&ShadowPtr.TgtPtrVal,
1205-
sizeof(void *), AsyncInfo);
1206+
ShadowPtr.TgtPtrContent.data(),
1207+
ShadowPtr.PtrSize, AsyncInfo);
12061208
if (Ret != OFFLOAD_SUCCESS) {
12071209
REPORT("Copying data to device failed.\n");
12081210
return OFFLOAD_FAIL;
@@ -1227,21 +1229,26 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
12271229
}
12281230

12291231
// Wait for device-to-host memcopies for whole struct to complete,
1230-
// before restoring the correct host pointer.
1232+
// before restoring the correct host pointer/descriptor.
12311233
if (auto *Entry = TPR.getEntry()) {
12321234
AsyncInfo.addPostProcessingFunction([=]() -> int {
12331235
int Ret = Entry->foreachShadowPointerInfo(
12341236
[&](const ShadowPtrInfoTy &ShadowPtr) {
1235-
const bool isZeroCopy =
1236-
PM->getRequirements() & OMPX_REQ_AUTO_ZERO_COPY;
1237-
const bool isUSMMode =
1238-
PM->getRequirements() & OMP_REQ_UNIFIED_SHARED_MEMORY;
1239-
if (*ShadowPtr.HstPtrAddr == nullptr || isZeroCopy || isUSMMode)
1240-
return OFFLOAD_SUCCESS;
1241-
*ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal;
1242-
DP("Restoring original host pointer value " DPxMOD
1243-
" for host pointer " DPxMOD "\n",
1244-
DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr));
1237+
constexpr int64_t VoidPtrSize = sizeof(void *);
1238+
if (ShadowPtr.PtrSize > VoidPtrSize) {
1239+
DP("Restoring host descriptor " DPxMOD
1240+
" to its original content (%" PRId64
1241+
" bytes), containing pointee address " DPxMOD "\n",
1242+
DPxPTR(ShadowPtr.HstPtrAddr), ShadowPtr.PtrSize,
1243+
DPxPTR(ShadowPtr.HstPtrContent.data()));
1244+
} else {
1245+
DP("Restoring host pointer " DPxMOD
1246+
" to its original value " DPxMOD "\n",
1247+
DPxPTR(ShadowPtr.HstPtrAddr),
1248+
DPxPTR(ShadowPtr.HstPtrContent.data()));
1249+
}
1250+
std::memcpy(ShadowPtr.HstPtrAddr, ShadowPtr.HstPtrContent.data(),
1251+
ShadowPtr.PtrSize);
12451252
return OFFLOAD_SUCCESS;
12461253
});
12471254
Entry->unlock();

0 commit comments

Comments
 (0)