diff --git a/offload/include/OpenMP/Mapping.h b/offload/include/OpenMP/Mapping.h index 45bd9c6e7da8b..0ec85b9dea344 100644 --- a/offload/include/OpenMP/Mapping.h +++ b/offload/include/OpenMP/Mapping.h @@ -484,20 +484,35 @@ struct AttachMapInfo { MapType(Type), Pointername(Name) {} }; -/// Structure to track ATTACH entries and new allocations across recursive calls -/// (for handling mappers) to targetDataBegin for a given construct. -struct AttachInfoTy { - /// ATTACH map entries for deferred processing. +/// Structure to track new allocations, ATTACH entries, DELETE entries and +/// skipped FROM data transfer information for a given construct, across +/// recursive calls (for handling mappers) to targetDataBegin/targetDataEnd. +struct StateInfoTy { + /// ATTACH map entries for deferred processing until all other maps are done. llvm::SmallVector AttachEntries; + /// Host pointers for which new memory was allocated. /// Key: host pointer, Value: allocation size. llvm::DenseMap NewAllocations; - AttachInfoTy() = default; + /// Host pointers that had a FROM entry, but for which a data transfer was + /// skipped due to the ref-count not being zero. + /// Key: host pointer, Value: data size. + llvm::DenseMap SkippedFromEntries; + + /// Host pointers for which we have attempted a FROM transfer at some point + /// during targetDataEnd. Used to avoid duplicate transfers. + llvm::SmallSet TransferredFromPtrs; + + /// Host pointers for which a DELETE entry was encountered, causing their + /// ref-count to have gone down to zero. + llvm::SmallSet MarkedForDeletionPtrs; + + StateInfoTy() = default; // Delete copy constructor and copy assignment operator to prevent copying - AttachInfoTy(const AttachInfoTy &) = delete; - AttachInfoTy &operator=(const AttachInfoTy &) = delete; + StateInfoTy(const StateInfoTy &) = delete; + StateInfoTy &operator=(const StateInfoTy &) = delete; }; // Function pointer type for targetData* functions (targetDataBegin, @@ -505,7 +520,7 @@ struct AttachInfoTy { typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **, void **, int64_t *, int64_t *, map_var_info_t *, void **, AsyncInfoTy &, - AttachInfoTy *, bool); + StateInfoTy *, bool); void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device, bool toStdOut = false); @@ -514,24 +529,22 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - AttachInfoTy *AttachInfo = nullptr, - bool FromMapper = false); + StateInfoTy *StateInfo = nullptr, bool FromMapper = false); int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false); + StateInfoTy *StateInfo = nullptr, bool FromMapper = false); int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - AttachInfoTy *AttachInfo = nullptr, - bool FromMapper = false); + StateInfoTy *StateInfo = nullptr, bool FromMapper = false); // Process deferred ATTACH map entries collected during targetDataBegin. -int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, +int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo, AsyncInfoTy &AsyncInfo); struct MappingInfoTy { @@ -572,7 +585,7 @@ struct MappingInfoTy { bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr, - bool ReleaseHDTTMap = true); + bool ReleaseHDTTMap = true, StateInfoTy *StateInfo = nullptr); /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor /// ensures exclusive access to the HDTT map. diff --git a/offload/libomptarget/OpenMP/Mapping.cpp b/offload/libomptarget/OpenMP/Mapping.cpp index 9b3533895f2a6..2286c422d41f2 100644 --- a/offload/libomptarget/OpenMP/Mapping.cpp +++ b/offload/libomptarget/OpenMP/Mapping.cpp @@ -202,7 +202,8 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer( int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, - AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { + AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap, + StateInfoTy *StateInfo) { LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR); LR.TPR.Flags.IsPresent = true; @@ -322,10 +323,28 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer( if (ReleaseHDTTMap) HDTTMap.destroy(); - // If the target pointer is valid, and we need to transfer data, issue the - // data transfer. + // Lambda to check if this pointer was newly allocated on the current region. + // This is needed to handle cases when the TO entry is encounter after an + // alloc entry for the same pointer, which increased the ref-count from 0 to + // 1, has already been encountered before. But because the ref-count was + // already 1 when TO was encountered, it wouldn't incur a transfer. e.g. + // ... map(alloc: x) map(to: x). + auto WasNewlyAllocatedForCurrentRegion = [&]() { + if (!StateInfo) + return false; + bool IsNewlyAllocated = StateInfo->NewAllocations.contains(HstPtrBegin); + if (IsNewlyAllocated) + DP("HstPtrBegin " DPxMOD " was newly allocated for the current region\n", + DPxPTR(HstPtrBegin)); + return IsNewlyAllocated; + }; + + // Even if this isn't a new entry, we still need to do a data-transfer if + // the pointer was newly allocated on the current target region. if (LR.TPR.TargetPointer && !LR.TPR.Flags.IsHostPointer && HasFlagTo && - (LR.TPR.Flags.IsNewEntry || HasFlagAlways) && Size != 0) { + (LR.TPR.Flags.IsNewEntry || HasFlagAlways || + WasNewlyAllocatedForCurrentRegion()) && + Size != 0) { // If we have something like: // #pragma omp target map(to: s.myarr[0:10]) map(to: s.myarr[0:10]) diff --git a/offload/libomptarget/interface.cpp b/offload/libomptarget/interface.cpp index fe18289765906..ac03546860740 100644 --- a/offload/libomptarget/interface.cpp +++ b/offload/libomptarget/interface.cpp @@ -167,19 +167,22 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase, int Rc = OFFLOAD_SUCCESS; - // Only allocate AttachInfo for targetDataBegin - std::unique_ptr AttachInfo; - if (TargetDataFunction == targetDataBegin) - AttachInfo = std::make_unique(); + // Allocate StateInfo for targetDataBegin and targetDataEnd to track + // allocations, pointer attachments and deferred transfers. + // This is not needed for targetDataUpdate. + std::unique_ptr StateInfo; + if (TargetDataFunction == targetDataBegin || + TargetDataFunction == targetDataEnd) + StateInfo = std::make_unique(); Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo, - AttachInfo.get(), /*FromMapper=*/false); + StateInfo.get(), /*FromMapper=*/false); if (Rc == OFFLOAD_SUCCESS) { // Process deferred ATTACH entries BEFORE synchronization - if (AttachInfo && !AttachInfo->AttachEntries.empty()) - Rc = processAttachEntries(*DeviceOrErr, *AttachInfo, AsyncInfo); + if (StateInfo && !StateInfo->AttachEntries.empty()) + Rc = processAttachEntries(*DeviceOrErr, *StateInfo, AsyncInfo); if (Rc == OFFLOAD_SUCCESS) Rc = AsyncInfo.synchronize(); diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 69725e77bae00..e5161699ad337 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -294,7 +294,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, int64_t ArgSize, int64_t ArgType, map_var_info_t ArgNames, void *ArgMapper, AsyncInfoTy &AsyncInfo, TargetDataFuncPtrTy TargetDataFunction, - AttachInfoTy *AttachInfo = nullptr) { + StateInfoTy *StateInfo = nullptr) { DP("Calling the mapper function " DPxMOD "\n", DPxPTR(ArgMapper)); // The mapper function fills up Components. @@ -325,7 +325,7 @@ int targetDataMapper(ident_t *Loc, DeviceTy &Device, void *ArgBase, void *Arg, MapperArgsBase.data(), MapperArgs.data(), MapperArgSizes.data(), MapperArgTypes.data(), MapperArgNames.data(), /*arg_mappers*/ nullptr, - AsyncInfo, AttachInfo, /*FromMapper=*/true); + AsyncInfo, StateInfo, /*FromMapper=*/true); return Rc; } @@ -509,12 +509,12 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - AttachInfoTy *AttachInfo, bool FromMapper) { - assert(AttachInfo && "AttachInfo must be available for targetDataBegin for " - "handling ATTACH map-types."); + StateInfoTy *StateInfo, bool FromMapper) { + assert(StateInfo && "StateInfo must be available for targetDataBegin for " + "handling ATTACH and TO/TOFROM map-types."); // process each input. for (int32_t I = 0; I < ArgNum; ++I) { - // Ignore private variables and arrays - there is no mapping for them. + // Ignore private variables and arrays - there is no mapping for t.attahem. if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)) continue; @@ -529,7 +529,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; int Rc = targetDataMapper(Loc, Device, ArgsBase[I], Args[I], ArgSizes[I], ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo, - targetDataBegin, AttachInfo); + targetDataBegin, StateInfo); if (Rc != OFFLOAD_SUCCESS) { REPORT("Call to targetDataBegin via targetDataMapper for custom mapper" @@ -556,7 +556,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // similar to firstprivate (PRIVATE | TO) entries by // PrivateArgumentManager. if (!IsCorrespondingPointerInit) - AttachInfo->AttachEntries.emplace_back( + StateInfo->AttachEntries.emplace_back( /*PointerBase=*/HstPtrBase, /*PointeeBegin=*/HstPtrBegin, /*PointerSize=*/DataSize, /*MapType=*/ArgTypes[I], /*PointeeName=*/HstPtrName); @@ -633,7 +633,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Track new allocation, for eventual use in attachment decision-making. if (PointerTpr.Flags.IsNewEntry && !IsHostPtr) - AttachInfo->NewAllocations[HstPtrBase] = sizeof(void *); + StateInfo->NewAllocations[HstPtrBase] = sizeof(void *); DP("There are %zu bytes allocated at target address " DPxMOD " - is%s new" "\n", @@ -654,7 +654,8 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, auto TPR = Device.getMappingInfo().getTargetPointer( HDTTMap, HstPtrBegin, HstPtrBase, TgtPadding, DataSize, HstPtrName, HasFlagTo, HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, - HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry()); + HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry(), + /*ReleaseHDTTMap=*/true, StateInfo); void *TgtPtrBegin = TPR.TargetPointer; IsHostPtr = TPR.Flags.IsHostPointer; // If data_size==0, then the argument could be a zero-length pointer to @@ -664,11 +665,30 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, HasPresentModifier ? "'present' map type modifier" : "device failure or illegal mapping"); return OFFLOAD_FAIL; + } else if (TgtPtrBegin && HasPresentModifier && + StateInfo->NewAllocations.contains(HstPtrBegin)) { + // For "PRESENT" entries, we may have cases like the following: + // map(alloc: p[0]) map(present, alloc: p[0]) + // If the compiler does not merge these entries, then the "PRESENT" entry + // may be encountered after a previous entry allocated new storage for it. + // To catch such cases, we should also look at any existing allocations + // and error out if we have one matching the pointer. We don't need to + // worry about cases like: + // map(alloc: p[1:10]) map(present, alloc: p[2:5]) + // as the list-items share storage, but are not identical, which is a + // user error as per OpenMP. + MESSAGE("device mapping required by 'present' map type modifier does not " + "exist for host address " DPxMOD " (%" PRId64 " bytes)\n", + DPxPTR(HstPtrBegin), DataSize); + REPORT("Pointer " DPxMOD + " was not present on the device upon entry to the region.\n", + DPxPTR(HstPtrBegin)); + return OFFLOAD_FAIL; } - // Track new allocation, for eventual use in attachment decision-making. + // Track new allocation, for eventual use in attachment/to decision-making. if (TPR.Flags.IsNewEntry && !IsHostPtr && TgtPtrBegin) - AttachInfo->NewAllocations[HstPtrBegin] = DataSize; + StateInfo->NewAllocations[HstPtrBegin] = DataSize; DP("There are %" PRId64 " bytes allocated at target address " DPxMOD " - is%s new\n", @@ -751,29 +771,29 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, /// /// For this purpose, we insert a data_fence before the first /// pointer-attachment, (3), to ensure that all pending transfers finish first. -int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, +int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo, AsyncInfoTy &AsyncInfo) { // Report all tracked allocations from both main loop and ATTACH processing - if (!AttachInfo.NewAllocations.empty()) { + if (!StateInfo.NewAllocations.empty()) { DP("Tracked %u total new allocations:\n", - (unsigned)AttachInfo.NewAllocations.size()); - for ([[maybe_unused]] const auto &Alloc : AttachInfo.NewAllocations) { + (unsigned)StateInfo.NewAllocations.size()); + for ([[maybe_unused]] const auto &Alloc : StateInfo.NewAllocations) { DP(" Host ptr: " DPxMOD ", Size: %" PRId64 " bytes\n", DPxPTR(Alloc.first), Alloc.second); } } - if (AttachInfo.AttachEntries.empty()) + if (StateInfo.AttachEntries.empty()) return OFFLOAD_SUCCESS; DP("Processing %zu deferred ATTACH map entries\n", - AttachInfo.AttachEntries.size()); + StateInfo.AttachEntries.size()); int Ret = OFFLOAD_SUCCESS; bool IsFirstPointerAttachment = true; - for (size_t EntryIdx = 0; EntryIdx < AttachInfo.AttachEntries.size(); + for (size_t EntryIdx = 0; EntryIdx < StateInfo.AttachEntries.size(); ++EntryIdx) { - const auto &AttachEntry = AttachInfo.AttachEntries[EntryIdx]; + const auto &AttachEntry = StateInfo.AttachEntries[EntryIdx]; void **HstPtr = reinterpret_cast(AttachEntry.PointerBase); @@ -792,7 +812,7 @@ int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo, // Lambda to check if a pointer was newly allocated auto WasNewlyAllocated = [&](void *Ptr, const char *PtrName) { bool IsNewlyAllocated = - llvm::any_of(AttachInfo.NewAllocations, [&](const auto &Alloc) { + llvm::any_of(StateInfo.NewAllocations, [&](const auto &Alloc) { void *AllocPtr = Alloc.first; int64_t AllocSize = Alloc.second; return Ptr >= AllocPtr && @@ -1009,7 +1029,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgBases, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - AttachInfoTy *AttachInfo, bool FromMapper) { + StateInfoTy *StateInfo, bool FromMapper) { + assert(StateInfo && "StateInfo is required for targetDataEnd for handling " + "FROM data transfers"); int Ret = OFFLOAD_SUCCESS; auto *PostProcessingPtrs = new SmallVector(); // process each input. @@ -1037,7 +1059,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, map_var_info_t ArgName = (!ArgNames) ? nullptr : ArgNames[I]; Ret = targetDataMapper(Loc, Device, ArgBases[I], Args[I], ArgSizes[I], ArgTypes[I], ArgName, ArgMappers[I], AsyncInfo, - targetDataEnd); + targetDataEnd, StateInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Call to targetDataEnd via targetDataMapper for custom mapper" @@ -1064,6 +1086,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, HstPtrBegin, DataSize, UpdateRef, HasHoldModifier, !IsImplicit, ForceDelete, /*FromDataEnd=*/true); void *TgtPtrBegin = TPR.TargetPointer; + if (!TPR.isPresent() && !TPR.isHostPointer() && (DataSize || HasPresentModifier)) { DP("Mapping does not exist (%s)\n", @@ -1103,15 +1126,76 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, if (!TPR.isPresent()) continue; + // Track force-deleted pointers so we can use this information if we + // encounter FROM entries for the same pointer later on. + if (ForceDelete && TPR.Flags.IsLast) + StateInfo->MarkedForDeletionPtrs.insert(HstPtrBegin); + // Move data back to the host const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM; - if (HasFrom && (HasAlways || TPR.Flags.IsLast) && - !TPR.Flags.IsHostPointer && DataSize != 0) { + const bool IsMemberOf = ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF; + int64_t TransferSize = DataSize; // Size for FROM data-transfer. + + // Lambda to check if there was a previously skipped FROM for this pointer + // due to its ref-count not being zero. Updates TransferSize if found. + auto HasSkippedMapFrom = [&]() -> bool { + auto It = StateInfo->SkippedFromEntries.find(HstPtrBegin); + if (It == StateInfo->SkippedFromEntries.end()) + return false; + DP("Found previously skipped FROM transfer for HstPtr=" DPxMOD + ", with size " + "%" PRId64 "\n", + DPxPTR(HstPtrBegin), It->second); + TransferSize = It->second; + StateInfo->SkippedFromEntries.erase(It); + return true; + }; + + // Lambda to check if this pointer was previously marked for deletion. + // Such a pointer would have had "IsLast" set to true when its DELETE entry + // was processed. So, the flag wouldn't be set for any FROM entries seen + // later on. + // This is needed to handle cases like the following: + // p1 = p2 = &x; + // ... map(delete: p1[:]) map(from: p2[0:1]) + // The ref-count becomes zero before encountering the FROM entry, but we + // still need to do a transfer, if it went from non-zero to zero. + auto WasPreviouslyMarkedForDeletion = [&]() -> bool { + if (!StateInfo->MarkedForDeletionPtrs.contains(HstPtrBegin)) + return false; + DP("Pointer HstPtr=" DPxMOD " was previously marked for deletion\n", + DPxPTR(HstPtrBegin)); + return true; + }; + + bool FromCopyBackAlreadyDone = + StateInfo->TransferredFromPtrs.contains(HstPtrBegin); + bool IsMapFromOnNonHostNonZeroData = + HasFrom && !TPR.Flags.IsHostPointer && DataSize != 0; + auto IsLastOrHasAlwaysOrWasForceDeleted = [&]() { + return TPR.Flags.IsLast || HasAlways || WasPreviouslyMarkedForDeletion(); + }; + + if (!FromCopyBackAlreadyDone && + ((IsMapFromOnNonHostNonZeroData && + IsLastOrHasAlwaysOrWasForceDeleted()) || + // Even if we're not looking at an entry with FROM map-type, if there + // were any previously skipped FROM transfers for this pointer, we + // should do them when the ref-count goes down to zero. + (TPR.Flags.IsLast && HasSkippedMapFrom()))) { + // Track that we're doing a FROM transfer for this pointer + // NOTE: If we don't care about the case of multiple different maps with + // from, always, or multiple map(from)s seen after a map(delete), e.g. + // ... map(always, from: x) map(always, from: x) + // ... map(delete: x) map(from: x) map(from: x) + // Then we can forego tacking TransferredFromPtrs. + StateInfo->TransferredFromPtrs.insert(HstPtrBegin); + DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", - DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); + TransferSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); TIMESCOPE_WITH_DETAILS_AND_IDENT( - "DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc); + "DevToHost", "Size=" + std::to_string(TransferSize) + "B", Loc); // Wait for any previous transfer if an event is present. if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { @@ -1120,8 +1204,8 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, } } - Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo, - TPR.getEntry()); + Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, TransferSize, + AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -1137,6 +1221,24 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, OFFLOAD_SUCCESS) return OFFLOAD_FAIL; } + } else if (!FromCopyBackAlreadyDone && IsMapFromOnNonHostNonZeroData && + !IsLastOrHasAlwaysOrWasForceDeleted() && !IsMemberOf) { + // We can have cases like the following: + // ... map(storage: p1[0:1]) map(from: p1[0:1]) + // + // where it's possible that when the FROM entry is processed, the + // ref count is not zero, so no data transfer happens for it. But + // the ref-count can go down to zero by the end of the directive + // in which case a transfer should happen. + // + // So, we keep track of any skipped FROM data-transfers, in case + // the ref-count goes down to zero later on. + // + // This should be limited to non-member-of entries because for member-of, + // their ref-count should go down only once as part of the parent. + StateInfo->SkippedFromEntries[HstPtrBegin] = DataSize; + DP("Skipping FROM map transfer for HstPtr=" DPxMOD ", Size=%" PRId64 "\n", + DPxPTR(HstPtrBegin), DataSize); } // Add pointer to the buffer for post-synchronize processing. @@ -1315,7 +1417,7 @@ int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, void **ArgsBase, void **Args, int64_t *ArgSizes, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, - AttachInfoTy *AttachInfo, bool FromMapper) { + StateInfoTy *StateInfo, bool FromMapper) { // process each input. for (int32_t I = 0; I < ArgNum; ++I) { if ((ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) || @@ -1806,21 +1908,21 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, if (!DeviceOrErr) FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); - // Create AttachInfo for tracking any ATTACH entries, or new-allocations + // Create StateInfo for tracking any ATTACH entries, new allocations, // when handling the "begin" mapping for a target constructs. - AttachInfoTy AttachInfo; + StateInfoTy StateInfo; int Ret = targetDataBegin(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo, - &AttachInfo, false /*FromMapper=*/); + &StateInfo, false /*FromMapper=*/); if (Ret != OFFLOAD_SUCCESS) { REPORT("Call to targetDataBegin failed, abort target.\n"); return OFFLOAD_FAIL; } // Process collected ATTACH entries - if (!AttachInfo.AttachEntries.empty()) { - Ret = processAttachEntries(*DeviceOrErr, AttachInfo, AsyncInfo); + if (!StateInfo.AttachEntries.empty()) { + Ret = processAttachEntries(*DeviceOrErr, StateInfo, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Failed to process ATTACH entries.\n"); return OFFLOAD_FAIL; @@ -1987,9 +2089,14 @@ static int processDataAfter(ident_t *Loc, int64_t DeviceId, void *HostPtr, if (!DeviceOrErr) FATAL_MESSAGE(DeviceId, "%s", toString(DeviceOrErr.takeError()).c_str()); + // Create StateInfo for tracking map(from)s for which ref-count is non-zero + // when the entry is encountered. + StateInfoTy StateInfo; + // Move data from device. - int Ret = targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes, - ArgTypes, ArgNames, ArgMappers, AsyncInfo); + int Ret = + targetDataEnd(Loc, *DeviceOrErr, ArgNum, ArgBases, Args, ArgSizes, + ArgTypes, ArgNames, ArgMappers, AsyncInfo, &StateInfo); if (Ret != OFFLOAD_SUCCESS) { REPORT("Call to targetDataEnd failed, abort target.\n"); return OFFLOAD_FAIL; diff --git a/offload/test/mapping/map_ordering_ptee_tgt_alloc_mapper_alloc_from_to.c b/offload/test/mapping/map_ordering_ptee_tgt_alloc_mapper_alloc_from_to.c new file mode 100644 index 0000000000000..3ad8748cb0572 --- /dev/null +++ b/offload/test/mapping/map_ordering_ptee_tgt_alloc_mapper_alloc_from_to.c @@ -0,0 +1,37 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// Since the allocation of the pointee happens as on the "target" construct (1), +// the "to" transfer requested as part of the mapper should also happen. +// +// Similarly, the "from" transfer should also happen at the end of the target +// construct, even if the ref-count of the pointee x has not gone down to 0 +// when "from" is encountered. + +// This currently fails, but should start passing once ATTACH-style maps are +// enabled for mappers (#166874). +// XFAIL: * + +#include + +typedef struct { + int *p; + int *q; +} S; +#pragma omp declare mapper(my_mapper : S s) map(alloc : s.p, s.p[0 : 10]) \ + map(from : s.p[0 : 10]) map(to : s.p[0 : 10]) map(alloc : s.p[0 : 10]) + +S s1; +int main() { + int x[10]; + x[1] = 111; + s1.q = s1.p = &x[0]; + +#pragma omp target map(alloc : s1.p[0 : 10]) \ + map(mapper(my_mapper), tofrom : s1) // (1) + { + printf("%d\n", s1.p[1]); // CHECK: 111 + s1.p[1] = s1.p[1] + 111; + } + + printf("%d\n", s1.p[1]); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_ptee_tgt_data_alloc_tgt_mapper_present_delete_from_to.c b/offload/test/mapping/map_ordering_ptee_tgt_data_alloc_tgt_mapper_present_delete_from_to.c new file mode 100644 index 0000000000000..5fb196e31d9c2 --- /dev/null +++ b/offload/test/mapping/map_ordering_ptee_tgt_data_alloc_tgt_mapper_present_delete_from_to.c @@ -0,0 +1,39 @@ +// RUN: %libomptarget-compile-generic -fopenmp-version=60 +// RUN: %libomptarget-run-generic | %fcheck-generic + +// The "present" check should pass on the "target" construct // (2), +// and there should be no "to" transfer, because the pointee "x" is already +// present (because of (1)). +// However, there should be a "from" transfer at the end of (2) because of the +// "delete" on the mapper. + +// This currently fails, but should start passing once ATTACH-style maps are +// enabled for mappers (#166874). +// XFAIL: * + +#include + +typedef struct { + int *p; + int *q; +} S; +#pragma omp declare mapper(my_mapper : S s) map(alloc : s.p) \ + map(alloc, present : s.p[0 : 10]) map(delete : s.q[ : ]) \ + map(from : s.p[0 : 10]) map(to : s.p[0 : 10]) map(alloc : s.p[0 : 10]) + +S s1; +int main() { + int x[10]; + x[1] = 111; + s1.q = s1.p = &x[0]; + +#pragma omp target data map(alloc : x) // (1) + { +#pragma omp target map(mapper(my_mapper), tofrom : s1) // (2) + { + printf("%d\n", s1.p[1]); // CHECK-NOT: 111 + s1.p[1] = 222; + } + printf("%d\n", s1.p[1]); // CHECK: 222 + } +} diff --git a/offload/test/mapping/map_ordering_tgt_alloc_from_to.c b/offload/test/mapping/map_ordering_tgt_alloc_from_to.c new file mode 100644 index 0000000000000..71f68134c7317 --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_alloc_from_to.c @@ -0,0 +1,25 @@ +// RUN: %libomptarget-compile-generic +// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK +// REQUIRES: libomptarget-debug + +#include + +// Even if the "alloc" and "from" are encountered before the "to", +// there should be a data-transfer from host to device, as the +// ref-count goes from 0 to 1 at the entry of the target region. + +int main() { + int x = 111; + // DEBUG: omptarget --> HstPtrBegin 0x[[#%x,HOST_ADDR:]] was newly allocated + // DEBUG-SAME: for the current region + // DEBUG: omptarget --> Moving {{.*}} bytes + // DEBUG-SAME: (hst:0x{{0*}}[[#HOST_ADDR]]) -> (tgt:0x{{.*}}) +#pragma omp target map(alloc : x) map(from : x) map(to : x) map(alloc : x) + { + printf("%d\n", x); // CHECK: 111 + x = x + 111; + } + + printf("%d\n", x); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_tgt_alloc_present_tofrom.c b/offload/test/mapping/map_ordering_tgt_alloc_present_tofrom.c new file mode 100644 index 0000000000000..f8f397efc4acd --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_alloc_present_tofrom.c @@ -0,0 +1,27 @@ +// RUN: %libomptarget-compile-generic +// RUN: %libomptarget-run-fail-generic 2>&1 \ +// RUN: | %fcheck-generic + +#include + +int main() { + // CHECK: addr=0x[[#%x,HOST_ADDR:]], size=[[#%u,SIZE:]] + int x = 111; + fprintf(stderr, "addr=%p, size=%ld\n", &x, sizeof(x)); +// CHECK: omptarget message: device mapping required by 'present' map type +// modifier does not exist for host address 0x{{0*}}[[#HOST_ADDR]] ([[#SIZE]] +// bytes) +// CHECK: omptarget error: Pointer 0x{{0*}}[[#HOST_ADDR]] was not present +// on the device upon entry to the region. +// ('present' map type modifier). +// CHECK: omptarget error: Call to targetDataBegin failed, abort target. +// CHECK: omptarget error: Failed to process data before launching the kernel. +// CHECK: omptarget fatal error 1: failure of target construct while offloading +// is mandatory +#pragma omp target map(alloc : x) map(present, alloc : x) map(tofrom : x) + { + printf("%d\n", x); + } + + return 0; +} diff --git a/offload/test/mapping/map_ordering_tgt_alloc_tofrom.c b/offload/test/mapping/map_ordering_tgt_alloc_tofrom.c new file mode 100644 index 0000000000000..c76e2b4bafa1a --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_alloc_tofrom.c @@ -0,0 +1,14 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include + +int main() { + int x = 111; +#pragma omp target map(alloc : x) map(tofrom : x) map(alloc : x) + { + printf("%d\n", x); // CHECK: 111 + x = x + 111; + } + + printf("%d\n", x); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_tgt_data_alloc_from.c b/offload/test/mapping/map_ordering_tgt_data_alloc_from.c new file mode 100644 index 0000000000000..e5905460bea19 --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_data_alloc_from.c @@ -0,0 +1,14 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include + +int main() { + int x = 111; +#pragma omp target data map(alloc : x) map(from : x) map(alloc : x) + { +#pragma omp target map(present, alloc : x) + x = 222; + } + + printf("%d\n", x); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_tgt_data_alloc_to_from.c b/offload/test/mapping/map_ordering_tgt_data_alloc_to_from.c new file mode 100644 index 0000000000000..1ed41200cecde --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_data_alloc_to_from.c @@ -0,0 +1,17 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include + +int main() { + int x = 111; +#pragma omp target data map(alloc : x) map(to : x) map(from : x) map(alloc : x) + { +#pragma omp target map(present, alloc : x) + { + printf("%d\n", x); // CHECK: 111 + x = x + 111; + } + } + + printf("%d\n", x); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_tgt_data_alloc_tofrom.c b/offload/test/mapping/map_ordering_tgt_data_alloc_tofrom.c new file mode 100644 index 0000000000000..6db30d2aa7f9d --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_data_alloc_tofrom.c @@ -0,0 +1,17 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include + +int main() { + int x = 111; +#pragma omp target data map(alloc : x) map(tofrom : x) map(alloc : x) + { +#pragma omp target map(present, alloc : x) + { + printf("%d\n", x); // CHECK: 111 + x = x + 111; + } + } + + printf("%d\n", x); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_tgt_exit_data_always_always.c b/offload/test/mapping/map_ordering_tgt_exit_data_always_always.c new file mode 100644 index 0000000000000..43f3aae26b009 --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_exit_data_always_always.c @@ -0,0 +1,29 @@ +// RUN: %libomptarget-compile-generic +// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK +// REQUIRES: libomptarget-debug + +// There should only be one "from" data-transfer, despite the two duplicate +// maps. + +#include + +int main() { + int x = 111; +#pragma omp target data map(alloc : x) + { +#pragma omp target enter data map(alloc : x) map(to : x) + { +#pragma omp target map(present, alloc : x) + { + printf("In tgt: %d\n", x); // CHECK-NOT: In tgt: 111 + x = 222; + } + } +#pragma omp target exit data map(always, from : x) map(always, from : x) + // DEBUG: omptarget --> Moving 4 bytes (tgt:0x{{.*}}) -> (hst:0x{{.*}}) + // DEBUG-NOT: omptarget --> Moving 4 bytes + } + + printf("%d\n", x); // CHECK: 222 +} diff --git a/offload/test/mapping/map_ordering_tgt_exit_data_delete_from.c b/offload/test/mapping/map_ordering_tgt_exit_data_delete_from.c new file mode 100644 index 0000000000000..ad7db66890edb --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_exit_data_delete_from.c @@ -0,0 +1,20 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +#include + +int main() { + int x = 111; +#pragma omp target data map(alloc : x) + { +#pragma omp target enter data map(alloc : x) map(to : x) + { +#pragma omp target map(present, alloc : x) + { + printf("%d\n", x); // CHECK-NOT: 111 + x = 222; + } + } +#pragma omp target exit data map(delete : x) map(from : x) map(delete : x) + printf("%d\n", x); // CHECK: 222 + } +} diff --git a/offload/test/mapping/map_ordering_tgt_exit_data_delete_from_assumedsize.c b/offload/test/mapping/map_ordering_tgt_exit_data_delete_from_assumedsize.c new file mode 100644 index 0000000000000..f8a117fc0ecf7 --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_exit_data_delete_from_assumedsize.c @@ -0,0 +1,37 @@ +// RUN: %libomptarget-compile-generic -fopenmp-version=60 +// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK +// REQUIRES: libomptarget-debug + +// The from on target_exit_data should result in a data-transfer of 4 bytes, +// even if when "from" is honored, the ref-count hasn't gone down to 0. +// It will eventually go down to 0 as part of the same exit_data due to the +// "delete" on it. +// This is a case that cannot be handled at compile time because the list-items +// are not related. + +#include + +int main() { + int x[10]; + int *p1x, *p2x; + p1x = p2x = &x[0]; + +#pragma omp target data map(alloc : x) + { +#pragma omp target enter data map(alloc : x) map(to : x) + { +#pragma omp target map(present, alloc : x) + { + printf("In tgt: %d\n", x[0]); // CHECK-NOT: In tgt: 111 + x[0] = 222; + } + } +// DEBUG: omptarget --> Found previously skipped FROM transfer +// DEBUG-SAME: for HstPtr=0x[[#%x,HOST_ADDR:]], with size [[#%u,SIZE:]] +// DEBUG: omptarget --> Moving [[#SIZE]] bytes +// DEBUG-SAME: (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]]) +#pragma omp target exit data map(delete : p1x[ : ]) map(from : p2x[0]) + printf("%d\n", x[0]); // CHECK: 222 + } +} diff --git a/offload/test/mapping/map_ordering_tgt_exit_data_from_delete_assumedsize.c b/offload/test/mapping/map_ordering_tgt_exit_data_from_delete_assumedsize.c new file mode 100644 index 0000000000000..3f5b08d3473f8 --- /dev/null +++ b/offload/test/mapping/map_ordering_tgt_exit_data_from_delete_assumedsize.c @@ -0,0 +1,38 @@ +// RUN: %libomptarget-compile-generic -fopenmp-version=60 +// RUN: env LIBOMPTARGET_DEBUG=1 %libomptarget-run-generic 2>&1 \ +// RUN: | %fcheck-generic -check-prefix=DEBUG -check-prefix=CHECK +// REQUIRES: libomptarget-debug + +#include + +// The from on target_exit_data should result in a data-transfer of 4 bytes, +// even if when "delete" is honored first, and by the time "from" is +// encountered, the ref-count had already been 0 (i.e. it's not transitioning +// from non-zero to zero). +// This is a case that cannot be handled at compile time because the list-items +// are not related. + +#include +int main() { + int x[10]; + int *p1x, *p2x; + p1x = p2x = &x[0]; + +#pragma omp target data map(alloc : x) + { +#pragma omp target enter data map(alloc : x) map(to : x) + { +#pragma omp target map(present, alloc : x) + { + printf("In tgt: %d\n", x[0]); // CHECK-NOT: In tgt: 111 + x[0] = 222; + } + } + // DEBUG: omptarget --> Pointer HstPtr=0x[[#%x,HOST_ADDR:]] + // DEBUG-SAME: was previously marked for deletion + // DEBUG: omptarget --> Moving {{.*}} bytes + // DEBUG-SAME: (tgt:0x{{.*}}) -> (hst:0x{{0*}}[[#HOST_ADDR]]) +#pragma omp target exit data map(from : p2x[0]) map(delete : p1x[ : ]) + printf("%d\n", x[0]); // CHECK: 222 + } +}