@@ -333,23 +333,23 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
333333// / Utility function to perform a pointer attachment operation.
334334// /
335335// / For something like:
336- // / \code
336+ // / ```cpp
337337// / int *p;
338338// / ...
339339// / #pragma omp target enter data map(to:p[10:10])
340- // / \endcode
340+ // / ```
341341// /
342342// / for which the attachment operation gets represented using:
343- // / \code
343+ // / ```
344344// / &p, &p[10], sizeof(p), ATTACH
345- // / \endcode
345+ // / ```
346346// /
347347// / (Hst|Tgt)PtrAddr represents &p
348348// / (Hst|Tgt)PteeBase represents &p[0]
349349// / (Hst|Tgt)PteeBegin represents &p[10]
350350// /
351351// / This function first computes the expected TgtPteeBase using:
352- // / TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)
352+ // / `<Select> TgtPteeBase = TgtPteeBegin - (HstPteeBegin - HstPteeBase)`
353353// /
354354// / and then attaches TgtPteeBase to TgtPtrAddr.
355355// /
@@ -362,14 +362,14 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
362362// / information such as lower-bound/upper-bound etc in their subsequent fields.
363363// /
364364// / For example, for the following:
365- // / \code
365+ // / ```fortran
366366// / integer, allocatable :: x(:)
367367// / integer, pointer :: p(:)
368368// / ...
369369// / p => x(10: 19)
370370// / ...
371371// / !$omp target enter data map(to:p(:))
372- // / \endcode
372+ // / ```
373373// /
374374// / The map should trigger a pointer-attachment (assuming the pointer-attachment
375375// / conditions as noted on processAttachEntries are met) between the descriptor
@@ -384,10 +384,10 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg,
384384// /
385385// / The function also handles pointer-attachment portion of PTR_AND_OBJ maps,
386386// / like:
387- // / \code
387+ // / ```
388388// / &p, &p[10], 10 * sizeof(p[10]), PTR_AND_OBJ
389- // / \endcoe
390- // / by using " sizeof(void*)" as \p HstPtrSize.
389+ // / ```
390+ // / by using ` sizeof(void*)` as \p HstPtrSize.
391391static int performPointerAttachment (DeviceTy &Device, AsyncInfoTy &AsyncInfo,
392392 void **HstPtrAddr, void *HstPteeBase,
393393 void *HstPteeBegin, void **TgtPtrAddr,
@@ -405,8 +405,9 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
405405 // Add shadow pointer tracking
406406 // TODO: Support shadow-tracking of larger than VoidPtrSize pointers,
407407 // to support restoration of Fortran descriptors. Currently, this check
408- // would return false, even if the host Fortran descriptor was, and we
409- // should have done an update of the device descriptor. e.g.
408+ // would return false, even if the host Fortran descriptor had been
409+ // updated since its previous map, and we should have updated its
410+ // device counterpart. e.g.
410411 //
411412 // !$omp target enter data map(x(1:100)) ! (1)
412413 // p => x(10: 19)
@@ -472,30 +473,6 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
472473 DPxPTR (HstDescriptorFieldsAddr));
473474
474475 // Submit the entire buffer to device
475- // FIXME: When handling ATTACH map-type, pointer attachment needs to happen
476- // after the other mapping operations are done, to avoid possibility of
477- // pending transfers clobbering the attachment, for example:
478- //
479- // int *p = ...;
480- // int **pp = &p;
481- // map(to: pp[0], p[0])
482- //
483- // Which would be represented by:
484- // &pp[0], &pp[0], sizeof(pp[0]), TO (1)
485- // &p[0], &p[0], sizeof(p[0]), TO (2)
486- //
487- // &pp, &pp[0], sizeof(pp), ATTACH (3)
488- // &p, &p[0], sizeof(p), ATTACH (4)
489- //
490- // (4) and (1) are both trying to modify the device memory corresponding to
491- // &p. We need to ensure that (4) happens last.
492- //
493- // One possible solution to this could be to insert a "device barrier" before
494- // the first ATTACH submitData call, so that every subsequent submitData waits
495- // for any prior operations to finish. Like:
496- // Device.submitData(..., /*InOrder=*/IsFirstAttachEntry)
497- // Where the boolean InOrder being true means that this submission should
498- // wait for prior memory submissions to finish.
499476 int SubmitResult = Device.submitData (TgtPtrAddr, DataBuffer, HstPtrSize,
500477 AsyncInfo, PtrTPR.getEntry ());
501478
@@ -702,10 +679,10 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
702679// /
703680// / From OpenMP's perspective, when mapping something that has a base pointer,
704681// / such as:
705- // / \code
682+ // / ```cpp
706683// / int *p;
707684// / #pragma omp enter target data map(to: p[10:20])
708- // / \endcode
685+ // / ```
709686// /
710687// / a pointer-attachment between p and &p[10] should occur if both p and
711688// / p[10] are present on the device after doing all allocations for all maps
@@ -718,6 +695,33 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
718695// / That's why we collect all attach entries and new memory allocations during
719696// / targetDataBegin, and use that information to make the decision of whether
720697// / to perform a pointer-attachment or not here, after maps have been handled.
698+ // /
699+ // / Additionally, once we decide that a pointer-attachment should be performed,
700+ // / we need to make sure that it happens after any previously submitted data
701+ // / transfers have completed, to avoid the possibility of the pending transfers
702+ // / clobbering the attachment. For example:
703+ // /
704+ // / ```cpp
705+ // / int *p = ...;
706+ // / int **pp = &p;
707+ // / map(to: pp[0], p[0])
708+ // / ```
709+ // /
710+ // / Which would be represented by:
711+ // / ```
712+ // / &pp[0], &pp[0], sizeof(pp[0]), TO (1)
713+ // / &p[0], &p[0], sizeof(p[0]), TO (2)
714+ // /
715+ // / &pp, &pp[0], sizeof(pp), ATTACH (3)
716+ // / &p, &p[0], sizeof(p), ATTACH (4)
717+ // / ```
718+ // /
719+ // / (4) and (1) are both trying to modify the device memory corresponding to
720+ // / `&p`. So, if we decide that (4) should do an attachment, we also need to
721+ // / ensure that (4) happens after (1) is complete.
722+ // /
723+ // / For this purpose, we insert a data_fence before the first
724+ // / pointer-attachment, (3), to ensure that all pending transfers finish first.
721725int processAttachEntries (DeviceTy &Device, AttachInfoTy &AttachInfo,
722726 AsyncInfoTy &AsyncInfo) {
723727 // Report all tracked allocations from both main loop and ATTACH processing
@@ -736,6 +740,8 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
736740 DP (" Processing %zu deferred ATTACH map entries\n " ,
737741 AttachInfo.AttachEntries .size ());
738742
743+ int Ret = OFFLOAD_SUCCESS;
744+ bool IsFirstPointerAttachment = true ;
739745 for (size_t EntryIdx = 0 ; EntryIdx < AttachInfo.AttachEntries .size ();
740746 ++EntryIdx) {
741747 const auto &AttachEntry = AttachInfo.AttachEntries [EntryIdx];
@@ -825,10 +831,22 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
825831 continue ;
826832 void *TgtPteeBegin = PteeTPROpt->TargetPointer ;
827833
828- // Update the device pointer to point to device pointee.
829- int Ret = performPointerAttachment (Device, AsyncInfo, HstPtr, HstPteeBase,
830- HstPteeBegin, TgtPtrBase, TgtPteeBegin,
831- PtrSize, PtrTPR);
834+ // Insert a data-fence before the first pointer-attachment.
835+ if (IsFirstPointerAttachment) {
836+ IsFirstPointerAttachment = false ;
837+ DP (" Inserting a data fence before the first pointer attachment.\n " );
838+ Ret = Device.dataFence (AsyncInfo);
839+ if (Ret != OFFLOAD_SUCCESS) {
840+ REPORT (" Failed to insert data fence.\n " );
841+ return OFFLOAD_FAIL;
842+ }
843+ }
844+
845+ // Do the pointer-attachment, i.e. update the device pointer to point to
846+ // device pointee.
847+ Ret = performPointerAttachment (Device, AsyncInfo, HstPtr, HstPteeBase,
848+ HstPteeBegin, TgtPtrBase, TgtPteeBegin,
849+ PtrSize, PtrTPR);
832850 if (Ret != OFFLOAD_SUCCESS)
833851 return OFFLOAD_FAIL;
834852
0 commit comments