@@ -407,7 +407,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
407
407
assert (PtrTPR.getEntry () &&
408
408
" Need a valid pointer entry to perform pointer-attachment" );
409
409
410
- constexpr int64_t VoidPtrSize = sizeof (void *);
410
+ int64_t VoidPtrSize = sizeof (void *);
411
411
assert (HstPtrSize >= VoidPtrSize && " PointerSize is too small" );
412
412
413
413
uint64_t Delta = reinterpret_cast <uint64_t >(HstPteeBegin) -
@@ -422,8 +422,23 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
422
422
DPxPTR (TgtPteeBase), DPxPTR (TgtPteeBegin));
423
423
424
424
// Add shadow pointer tracking
425
+ // TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
426
+ // to support restoration of Fortran descriptors. Currently, this check
427
+ // would return false, even if the host Fortran descriptor had been
428
+ // updated since its previous map, and we should have updated its
429
+ // device counterpart. e.g.
430
+ //
431
+ // !$omp target enter data map(x(1:100)) ! (1)
432
+ // p => x(10: 19)
433
+ // !$omp target enter data map(p, p(:)) ! (2)
434
+ // p => x(5: 9)
435
+ // !$omp target enter data map(attach(always): p(:)) ! (3)
436
+ //
437
+ // While PtrAddr(&desc_p) and PteeBase(&p(1)) are same for (2) and (3), the
438
+ // pointer attachment for (3) needs to update the bounds information
439
+ // in the descriptor of p on device.
425
440
if (!PtrTPR.getEntry ()->addShadowPointer (
426
- ShadowPtrInfoTy{HstPtrAddr, TgtPtrAddr, TgtPteeBase, HstPtrSize })) {
441
+ ShadowPtrInfoTy{HstPtrAddr, HstPteeBase, TgtPtrAddr, TgtPteeBase })) {
427
442
DP (" Pointer " DPxMOD " is already attached to " DPxMOD " \n " ,
428
443
DPxPTR (TgtPtrAddr), DPxPTR (TgtPteeBase));
429
444
return OFFLOAD_SUCCESS;
@@ -954,29 +969,22 @@ postProcessingTargetDataEnd(DeviceTy *Device,
954
969
DelEntry = false ;
955
970
}
956
971
957
- // If we copied back to the host a struct/array containing pointers, or
958
- // Fortran descriptors (which are larger than a "void *"), we need to
959
- // restore the original host pointer/descriptor values from their shadow
960
- // copies. If the struct is going to be deallocated, remove any remaining
961
- // shadow pointer entries for this struct.
972
+ // If we copied back to the host a struct/array containing pointers,
973
+ // we need to restore the original host pointer values from their
974
+ // shadow copies. If the struct is going to be deallocated, remove any
975
+ // remaining shadow pointer entries for this struct.
962
976
const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
963
977
if (HasFrom) {
964
978
Entry->foreachShadowPointerInfo ([&](const ShadowPtrInfoTy &ShadowPtr) {
965
- constexpr int64_t VoidPtrSize = sizeof (void *);
966
- if (ShadowPtr.PtrSize > VoidPtrSize) {
967
- DP (" Restoring host descriptor " DPxMOD
968
- " to its original content (%" PRId64
969
- " bytes), containing pointee address " DPxMOD " \n " ,
970
- DPxPTR (ShadowPtr.HstPtrAddr ), ShadowPtr.PtrSize ,
971
- DPxPTR (ShadowPtr.HstPtrContent .data ()));
972
- } else {
973
- DP (" Restoring host pointer " DPxMOD " to its original value " DPxMOD
974
- " \n " ,
975
- DPxPTR (ShadowPtr.HstPtrAddr ),
976
- DPxPTR (ShadowPtr.HstPtrContent .data ()));
977
- }
978
- std::memcpy (ShadowPtr.HstPtrAddr , ShadowPtr.HstPtrContent .data (),
979
- ShadowPtr.PtrSize );
979
+ const bool isZeroCopy = PM->getRequirements () & OMPX_REQ_AUTO_ZERO_COPY;
980
+ const bool isUSMMode =
981
+ PM->getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY;
982
+ if (*ShadowPtr.HstPtrAddr == nullptr || isZeroCopy || isUSMMode)
983
+ return OFFLOAD_SUCCESS;
984
+ *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal ;
985
+ DP (" Restoring original host pointer value " DPxMOD " for host "
986
+ " pointer " DPxMOD " \n " ,
987
+ DPxPTR (ShadowPtr.HstPtrVal ), DPxPTR (ShadowPtr.HstPtrAddr ));
980
988
return OFFLOAD_SUCCESS;
981
989
});
982
990
}
@@ -1189,22 +1197,12 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
1189
1197
if (TPR.getEntry ()) {
1190
1198
int Ret = TPR.getEntry ()->foreachShadowPointerInfo (
1191
1199
[&](ShadowPtrInfoTy &ShadowPtr) {
1192
- constexpr int64_t VoidPtrSize = sizeof (void *);
1193
- if (ShadowPtr.PtrSize > VoidPtrSize) {
1194
- DP (" Restoring target descriptor " DPxMOD
1195
- " to its original content (%" PRId64
1196
- " bytes), containing pointee address " DPxMOD " \n " ,
1197
- DPxPTR (ShadowPtr.TgtPtrAddr ), ShadowPtr.PtrSize ,
1198
- DPxPTR (ShadowPtr.TgtPtrContent .data ()));
1199
- } else {
1200
- DP (" Restoring target pointer " DPxMOD
1201
- " to its original value " DPxMOD " \n " ,
1202
- DPxPTR (ShadowPtr.TgtPtrAddr ),
1203
- DPxPTR (ShadowPtr.TgtPtrContent .data ()));
1204
- }
1200
+ DP (" Restoring original target pointer value " DPxMOD " for target "
1201
+ " pointer " DPxMOD " \n " ,
1202
+ DPxPTR (ShadowPtr.TgtPtrVal ), DPxPTR (ShadowPtr.TgtPtrAddr ));
1205
1203
Ret = Device.submitData (ShadowPtr.TgtPtrAddr ,
1206
- ShadowPtr. TgtPtrContent . data () ,
1207
- ShadowPtr. PtrSize , AsyncInfo);
1204
+ ( void *)&ShadowPtr. TgtPtrVal ,
1205
+ sizeof ( void *) , AsyncInfo);
1208
1206
if (Ret != OFFLOAD_SUCCESS) {
1209
1207
REPORT (" Copying data to device failed.\n " );
1210
1208
return OFFLOAD_FAIL;
@@ -1229,26 +1227,21 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase,
1229
1227
}
1230
1228
1231
1229
// Wait for device-to-host memcopies for whole struct to complete,
1232
- // before restoring the correct host pointer/descriptor .
1230
+ // before restoring the correct host pointer.
1233
1231
if (auto *Entry = TPR.getEntry ()) {
1234
1232
AsyncInfo.addPostProcessingFunction ([=]() -> int {
1235
1233
int Ret = Entry->foreachShadowPointerInfo (
1236
1234
[&](const ShadowPtrInfoTy &ShadowPtr) {
1237
- constexpr int64_t VoidPtrSize = sizeof (void *);
1238
- if (ShadowPtr.PtrSize > VoidPtrSize) {
1239
- DP (" Restoring host descriptor " DPxMOD
1240
- " to its original content (%" PRId64
1241
- " bytes), containing pointee address " DPxMOD " \n " ,
1242
- DPxPTR (ShadowPtr.HstPtrAddr ), ShadowPtr.PtrSize ,
1243
- DPxPTR (ShadowPtr.HstPtrContent .data ()));
1244
- } else {
1245
- DP (" Restoring host pointer " DPxMOD
1246
- " to its original value " DPxMOD " \n " ,
1247
- DPxPTR (ShadowPtr.HstPtrAddr ),
1248
- DPxPTR (ShadowPtr.HstPtrContent .data ()));
1249
- }
1250
- std::memcpy (ShadowPtr.HstPtrAddr , ShadowPtr.HstPtrContent .data (),
1251
- ShadowPtr.PtrSize );
1235
+ const bool isZeroCopy =
1236
+ PM->getRequirements () & OMPX_REQ_AUTO_ZERO_COPY;
1237
+ const bool isUSMMode =
1238
+ PM->getRequirements () & OMP_REQ_UNIFIED_SHARED_MEMORY;
1239
+ if (*ShadowPtr.HstPtrAddr == nullptr || isZeroCopy || isUSMMode)
1240
+ return OFFLOAD_SUCCESS;
1241
+ *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal ;
1242
+ DP (" Restoring original host pointer value " DPxMOD
1243
+ " for host pointer " DPxMOD " \n " ,
1244
+ DPxPTR (ShadowPtr.HstPtrVal ), DPxPTR (ShadowPtr.HstPtrAddr ));
1252
1245
return OFFLOAD_SUCCESS;
1253
1246
});
1254
1247
Entry->unlock ();
0 commit comments