Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 28 additions & 15 deletions offload/include/OpenMP/Mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -484,28 +484,43 @@ struct AttachMapInfo {
MapType(Type), Pointername(Name) {}
};

/// Structure to track ATTACH entries and new allocations across recursive calls
/// (for handling mappers) to targetDataBegin for a given construct.
struct AttachInfoTy {
/// ATTACH map entries for deferred processing.
/// Structure to track new allocations, ATTACH entries, DELETE entries and
/// skipped FROM data transfer information for a given construct, across
/// recursive calls (for handling mappers) to targetDataBegin/targetDataEnd.
struct StateInfoTy {
/// ATTACH map entries for deferred processing until all other maps are done.
llvm::SmallVector<AttachMapInfo> AttachEntries;

/// Host pointers for which new memory was allocated.
/// Key: host pointer, Value: allocation size.
llvm::DenseMap<void *, int64_t> NewAllocations;

AttachInfoTy() = default;
/// Host pointers that had a FROM entry, but for which a data transfer was
/// skipped due to the ref-count not being zero.
/// Key: host pointer, Value: data size.
llvm::DenseMap<void *, int64_t> SkippedFromEntries;

/// Host pointers for which we have attempted a FROM transfer at some point
/// during targetDataEnd. Used to avoid duplicate transfers.
llvm::SmallSet<void *, 32> TransferredFromPtrs;

/// Host pointers for which a DELETE entry was encountered, causing their
/// ref-count to have gone down to zero.
llvm::SmallSet<void *, 32> MarkedForDeletionPtrs;

StateInfoTy() = default;

// Delete copy constructor and copy assignment operator to prevent copying
AttachInfoTy(const AttachInfoTy &) = delete;
AttachInfoTy &operator=(const AttachInfoTy &) = delete;
StateInfoTy(const StateInfoTy &) = delete;
StateInfoTy &operator=(const StateInfoTy &) = delete;
};

// Function pointer type for targetData* functions (targetDataBegin,
// targetDataEnd and targetDataUpdate).
typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
void **, int64_t *, int64_t *,
map_var_info_t *, void **, AsyncInfoTy &,
AttachInfoTy *, bool);
StateInfoTy *, bool);

void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device,
bool toStdOut = false);
Expand All @@ -514,24 +529,22 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo = nullptr,
bool FromMapper = false);
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);

int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgBases, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo = nullptr, bool FromMapper = false);
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);

int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes, map_var_info_t *ArgNames,
void **ArgMappers, AsyncInfoTy &AsyncInfo,
AttachInfoTy *AttachInfo = nullptr,
bool FromMapper = false);
StateInfoTy *StateInfo = nullptr, bool FromMapper = false);

// Process deferred ATTACH map entries collected during targetDataBegin.
int processAttachEntries(DeviceTy &Device, AttachInfoTy &AttachInfo,
int processAttachEntries(DeviceTy &Device, StateInfoTy &StateInfo,
AsyncInfoTy &AsyncInfo);

struct MappingInfoTy {
Expand Down Expand Up @@ -572,7 +585,7 @@ struct MappingInfoTy {
bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
bool ReleaseHDTTMap = true);
bool ReleaseHDTTMap = true, StateInfoTy *StateInfo = nullptr);

/// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
/// ensures exclusive access to the HDTT map.
Expand Down
27 changes: 23 additions & 4 deletions offload/libomptarget/OpenMP/Mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,8 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo,
bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) {
AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap,
StateInfoTy *StateInfo) {

LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR);
LR.TPR.Flags.IsPresent = true;
Expand Down Expand Up @@ -322,10 +323,28 @@ TargetPointerResultTy MappingInfoTy::getTargetPointer(
if (ReleaseHDTTMap)
HDTTMap.destroy();

// If the target pointer is valid, and we need to transfer data, issue the
// data transfer.
// Lambda to check if this pointer was newly allocated on the current region.
// This is needed to handle cases when the TO entry is encounter after an
// alloc entry for the same pointer, which increased the ref-count from 0 to
// 1, has already been encountered before. But because the ref-count was
// already 1 when TO was encountered, it wouldn't incur a transfer. e.g.
// ... map(alloc: x) map(to: x).
auto WasNewlyAllocatedForCurrentRegion = [&]() {
if (!StateInfo)
return false;
bool IsNewlyAllocated = StateInfo->NewAllocations.contains(HstPtrBegin);
if (IsNewlyAllocated)
DP("HstPtrBegin " DPxMOD " was newly allocated for the current region\n",
DPxPTR(HstPtrBegin));
return IsNewlyAllocated;
};

// Even if this isn't a new entry, we still need to do a data-transfer if
// the pointer was newly allocated on the current target region.
if (LR.TPR.TargetPointer && !LR.TPR.Flags.IsHostPointer && HasFlagTo &&
(LR.TPR.Flags.IsNewEntry || HasFlagAlways) && Size != 0) {
(LR.TPR.Flags.IsNewEntry || HasFlagAlways ||
WasNewlyAllocatedForCurrentRegion()) &&
Size != 0) {

// If we have something like:
// #pragma omp target map(to: s.myarr[0:10]) map(to: s.myarr[0:10])
Expand Down
17 changes: 10 additions & 7 deletions offload/libomptarget/interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,19 +167,22 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,

int Rc = OFFLOAD_SUCCESS;

// Only allocate AttachInfo for targetDataBegin
std::unique_ptr<AttachInfoTy> AttachInfo;
if (TargetDataFunction == targetDataBegin)
AttachInfo = std::make_unique<AttachInfoTy>();
// Allocate StateInfo for targetDataBegin and targetDataEnd to track
// allocations, pointer attachments and deferred transfers.
// This is not needed for targetDataUpdate.
std::unique_ptr<StateInfoTy> StateInfo;
if (TargetDataFunction == targetDataBegin ||
TargetDataFunction == targetDataEnd)
StateInfo = std::make_unique<StateInfoTy>();

Rc = TargetDataFunction(Loc, *DeviceOrErr, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, AsyncInfo,
AttachInfo.get(), /*FromMapper=*/false);
StateInfo.get(), /*FromMapper=*/false);

if (Rc == OFFLOAD_SUCCESS) {
// Process deferred ATTACH entries BEFORE synchronization
if (AttachInfo && !AttachInfo->AttachEntries.empty())
Rc = processAttachEntries(*DeviceOrErr, *AttachInfo, AsyncInfo);
if (StateInfo && !StateInfo->AttachEntries.empty())
Rc = processAttachEntries(*DeviceOrErr, *StateInfo, AsyncInfo);

if (Rc == OFFLOAD_SUCCESS)
Rc = AsyncInfo.synchronize();
Expand Down
Loading
Loading