Skip to content

Commit 2806389

Browse files
committed
Insert a data-fence before the first pointer-attachment.
1 parent 2370215 commit 2806389

File tree

1 file changed

+60
-42
lines changed

1 file changed

+60
-42
lines changed

offload/libomptarget/omptarget.cpp

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -333,23 +333,23 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
333333
/// Utility function to perform a pointer attachment operation.
334334
///
335335
/// For something like:
336-
/// \code
336+
/// ```cpp
337337
/// int *p;
338338
/// ...
339339
/// #pragma omp target enter data map(to:p[10:10])
340-
/// \endcode
340+
/// ```
341341
///
342342
/// for which the attachment operation gets represented using:
343-
/// \code
343+
/// ```
344344
/// &p, &p[10], sizeof(p), ATTACH
345-
/// \endcode
345+
/// ```
346346
///
347347
/// (Hst|Tgt)PtrAddr represents &p
348348
/// (Hst|Tgt)PteeBase represents &p[0]
349349
/// (Hst|Tgt)PteeBegin represents &p[10]
350350
///
351351
/// This function first computes the expected TgtPteeBase using:
352-
/// TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
352+
/// `<Select>TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)`
353353
///
354354
/// and then attaches TgtPteeBase to TgtPtrAddr.
355355
///
@@ -362,14 +362,14 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
362362
/// information such as lower-bound/upper-bound etc in their subsequent fields.
363363
///
364364
/// For example, for the following:
365-
/// \code
365+
/// ```fortran
366366
/// integer, allocatable :: x(:)
367367
/// integer, pointer :: p(:)
368368
/// ...
369369
/// p => x(10: 19)
370370
/// ...
371371
/// !$omp target enter data map(to:p(:))
372-
/// \endcode
372+
/// ```
373373
///
374374
/// The map should trigger a pointer-attachment (assuming the pointer-attachment
375375
/// conditions as noted on processAttachEntries are met) between the descriptor
@@ -384,10 +384,10 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
384384
///
385385
/// The function also handles pointer-attachment portion of PTR_AND_OBJ maps,
386386
/// like:
387-
/// \code
387+
/// ```
388388
/// &p, &p[10], 10 * sizeof(p[10]), PTR_AND_OBJ
389-
/// \endcoe
390-
/// by using "sizeof(void*)" as \p HstPtrSize.
389+
/// ```
390+
/// by using `sizeof(void*)` as \p HstPtrSize.
391391
static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
392392
void **HstPtrAddr, void *HstPteeBase,
393393
void *HstPteeBegin, void **TgtPtrAddr,
@@ -405,8 +405,9 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
405405
// Add shadow pointer tracking
406406
// TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
407407
// to support restoration of Fortran descriptors. Currently, this check
408-
// would return false, even if the host Fortran descriptor was, and we
409-
// should have done an update of the device descriptor. e.g.
408+
// would return false, even if the host Fortran descriptor had been
409+
// updated since its previous map, and we should have updated its
410+
// device counterpart. e.g.
410411
//
411412
// !$omp target enter data map(x(1:100)) ! (1)
412413
// p => x(10: 19)
@@ -472,30 +473,6 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
472473
DPxPTR(HstDescriptorFieldsAddr));
473474

474475
// Submit the entire buffer to device
475-
// FIXME: When handling ATTACH map-type, pointer attachment needs to happen
476-
// after the other mapping operations are done, to avoid possibility of
477-
// pending transfers clobbering the attachment, for example:
478-
//
479-
// int *p = ...;
480-
// int **pp = &p;
481-
// map(to: pp[0], p[0])
482-
//
483-
// Which would be represented by:
484-
// &pp[0], &pp[0], sizeof(pp[0]), TO (1)
485-
// &p[0], &p[0], sizeof(p[0]), TO (2)
486-
//
487-
// &pp, &pp[0], sizeof(pp), ATTACH (3)
488-
// &p, &p[0], sizeof(p), ATTACH (4)
489-
//
490-
// (4) and (1) are both trying to modify the device memory corresponding to
491-
// &p. We need to ensure that (4) happens last.
492-
//
493-
// One possible solution to this could be to insert a "device barrier" before
494-
// the first ATTACH submitData call, so that every subsequent submitData waits
495-
// for any prior operations to finish. Like:
496-
// Device.submitData(..., /*InOrder=*/IsFirstAttachEntry)
497-
// Where the boolean InOrder being true means that this submission should
498-
// wait for prior memory submissions to finish.
499476
int SubmitResult = Device.submitData(TgtPtrAddr, DataBuffer, HstPtrSize,
500477
AsyncInfo, PtrTPR.getEntry());
501478

@@ -702,10 +679,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
702679
///
703680
/// From OpenMP's perspective, when mapping something that has a base pointer,
704681
/// such as:
705-
/// \code
682+
/// ```cpp
706683
/// int *p;
707684
/// #pragma omp enter target data map(to: p[10:20])
708-
/// \endcode
685+
/// ```
709686
///
710687
/// a pointer-attachment between p and &p[10] should occur if both p and
711688
/// p[10] are present on the device after doing all allocations for all maps
@@ -718,6 +695,33 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
718695
/// That's why we collect all attach entries and new memory allocations during
719696
/// targetDataBegin, and use that information to make the decision of whether
720697
/// to perform a pointer-attachment or not here, after maps have been handled.
698+
///
699+
/// Additionally, once we decide that a pointer-attachment should be performed,
700+
/// we need to make sure that it happens after any previously submitted data
701+
/// transfers have completed, to avoid the possibility of the pending transfers
702+
/// clobbering the attachment. For example:
703+
///
704+
/// ```cpp
705+
/// int *p = ...;
706+
/// int **pp = &p;
707+
/// map(to: pp[0], p[0])
708+
/// ```
709+
///
710+
/// Which would be represented by:
711+
/// ```
712+
/// &pp[0], &pp[0], sizeof(pp[0]), TO (1)
713+
/// &p[0], &p[0], sizeof(p[0]), TO (2)
714+
///
715+
/// &pp, &pp[0], sizeof(pp), ATTACH (3)
716+
/// &p, &p[0], sizeof(p), ATTACH (4)
717+
/// ```
718+
///
719+
/// (4) and (1) are both trying to modify the device memory corresponding to
720+
/// `&p`. So, if we decide that (4) should do an attachment, we also need to
721+
/// ensure that (4) happens after (1) is complete.
722+
///
723+
/// For this purpose, we insert a data_fence before the first
724+
/// pointer-attachment, (3), to ensure that all pending transfers finish first.
721725
int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
722726
AsyncInfoTy &AsyncInfo) {
723727
// Report all tracked allocations from both main loop and ATTACH processing
@@ -736,6 +740,8 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
736740
DP("Processing %zu deferred ATTACH map entries\n",
737741
AttachInfo.AttachEntries.size());
738742

743+
int Ret = OFFLOAD_SUCCESS;
744+
bool IsFirstPointerAttachment = true;
739745
for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size();
740746
++EntryIdx) {
741747
const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx];
@@ -825,10 +831,22 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
825831
continue;
826832
void *TgtPteeBegin = PteeTPROpt->TargetPointer;
827833

828-
// Update the device pointer to point to device pointee.
829-
int Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
830-
HstPteeBegin, TgtPtrBase, TgtPteeBegin,
831-
PtrSize, PtrTPR);
834+
// Insert a data-fence before the first pointer-attachment.
835+
if (IsFirstPointerAttachment) {
836+
IsFirstPointerAttachment = false;
837+
DP("Inserting a data fence before the first pointer attachment.\n");
838+
Ret = Device.dataFence(AsyncInfo);
839+
if (Ret != OFFLOAD_SUCCESS) {
840+
REPORT("Failed to insert data fence.\n");
841+
return OFFLOAD_FAIL;
842+
}
843+
}
844+
845+
// Do the pointer-attachment, i.e. update the device pointer to point to
846+
// device pointee.
847+
Ret = performPointerAttachment(Device, AsyncInfo, HstPtr, HstPteeBase,
848+
HstPteeBegin, TgtPtrBase, TgtPteeBegin,
849+
PtrSize, PtrTPR);
832850
if (Ret != OFFLOAD_SUCCESS)
833851
return OFFLOAD_FAIL;
834852

0 commit comments

Comments
 (0)