Skip to content
7 changes: 7 additions & 0 deletions offload/include/OpenMP/Mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,13 @@ struct LookupResult {
LookupResult() : Flags({0, 0, 0}), TPR() {}

TargetPointerResultTy TPR;

bool isEmpty() const {
bool IsEmpty = Flags.IsContained == 0
& Flags.ExtendsBefore == 0
& Flags.ExtendsAfter == 0;
return IsEmpty;
}
};

// This structure stores information of a mapped memory region.
Expand Down
2 changes: 2 additions & 0 deletions offload/include/Shared/Debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ enum OpenMPInfoType : uint32_t {
OMP_INFOTYPE_DATA_TRANSFER = 0x0020,
// Print whenever data does not have a viable device counterpart.
OMP_INFOTYPE_EMPTY_MAPPING = 0x0040,
// Print whenever data does not need to be transferred
OMP_INFOTYPE_REDUNDANT_TRANSFER = 0x0080,
// Enable every flag.
OMP_INFOTYPE_ALL = 0xffffffff,
};
Expand Down
74 changes: 72 additions & 2 deletions offload/libomptarget/omptarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,71 @@ class PrivateArgumentManagerTy {
}
};

/// Try to find redundant mappings associated with a kernel launch,
/// and provide a masked version of the kernel argument types that
/// avoid redundant to data transfers between the host and device.
static std::unique_ptr<int64_t[]> maskRedundantTransfers(DeviceTy &Device, int32_t ArgNum,
int64_t *ArgTypes, int64_t *ArgSizes,
map_var_info_t *ArgNames, void **ArgPtrs,
void **ArgMappers) {
std::unique_ptr<int64_t[]> ArgTypesOverride = std::make_unique<int64_t[]>(ArgNum);

MappingInfoTy &MappingInfo = Device.getMappingInfo();
MappingInfoTy::HDTTMapAccessorTy HDTTMap = MappingInfo
.HostDataToTargetMap.getExclusiveAccessor();

int64_t UnusedArgs = 0;

for (int32_t I = 0; I < ArgNum; ++I) {
tgt_map_type ArgType = (tgt_map_type) ArgTypes[I];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clang format the commit. (sth like git clang-format HEAD~ if the script is in your path)


// Check for unused implicit mappings
bool IsArgUnused = ArgType == OMP_TGT_MAPTYPE_NONE;

// Check for unused `map(buf[0:size])` mappings
IsArgUnused |= ArgType == OMP_TGT_MAPTYPE_FROM
|| ArgType == OMP_TGT_MAPTYPE_TO
|| ArgType == (OMP_TGT_MAPTYPE_FROM | OMP_TGT_MAPTYPE_TO);

// Check for unused `map(wrapper.buf[0:size])` mappings
IsArgUnused |= UnusedArgs == ArgNum - 1 && ArgType & OMP_TGT_MAPTYPE_MEMBER_OF
&& ((ArgType & ~OMP_TGT_MAPTYPE_MEMBER_OF) == OMP_TGT_MAPTYPE_PTR_AND_OBJ
|| (ArgType & ~OMP_TGT_MAPTYPE_MEMBER_OF) == (OMP_TGT_MAPTYPE_PTR_AND_OBJ | OMP_TGT_MAPTYPE_TO)
|| (ArgType & ~OMP_TGT_MAPTYPE_MEMBER_OF) == (OMP_TGT_MAPTYPE_PTR_AND_OBJ | OMP_TGT_MAPTYPE_FROM | OMP_TGT_MAPTYPE_TO));

bool IsExistingMapping = !MappingInfo.lookupMapping(HDTTMap, ArgPtrs[I], ArgSizes[I]).isEmpty();

bool IsCustomMapped = ArgMappers && ArgMappers[I];

if (IsExistingMapping | IsCustomMapped | !IsArgUnused) {
ArgTypesOverride[I] = ArgTypes[I];
continue;
}

const std::string Name = ArgNames && ArgNames[I] ?
getNameFromMapping(ArgNames[I]) : std::string("unknown");

bool IsArgFrom = ArgType & OMP_TGT_MAPTYPE_FROM;
bool IsArgTo = ArgType & OMP_TGT_MAPTYPE_TO;

const char *Type = IsArgFrom && IsArgTo ? "tofrom"
: IsArgFrom ? "from"
: IsArgTo ? "to"
: "unknown";

// Optimisation:
// A new mapping is not used by the kernel.
// Change the type such that no data is transferred to and/or from the device.
INFO(OMP_INFOTYPE_REDUNDANT_TRANSFER, Device.DeviceID, "%s(%s)[%" PRId64 "] %s\n", Type,
Name.c_str(), ArgSizes[I], "is not used and will not be copied");

ArgTypesOverride[I] = ArgTypes[I] & ~(OMP_TGT_MAPTYPE_TO | OMP_TGT_MAPTYPE_FROM);
UnusedArgs++;
}

return ArgTypesOverride;
}

/// Process data before launching the kernel, including calling targetDataBegin
/// to map and transfer data to target device, transferring (first-)private
/// variables.
Expand Down Expand Up @@ -1417,11 +1482,16 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,

int NumClangLaunchArgs = KernelArgs.NumArgs;
int Ret = OFFLOAD_SUCCESS;

std::unique_ptr<int64_t[]> ArgTypesOverride =
maskRedundantTransfers(Device, NumClangLaunchArgs, KernelArgs.ArgTypes,
KernelArgs.ArgSizes, KernelArgs.ArgNames, KernelArgs.ArgPtrs, KernelArgs.ArgMappers);

if (NumClangLaunchArgs) {
// Process data, such as data mapping, before launching the kernel
Ret = processDataBefore(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
KernelArgs.ArgSizes, KernelArgs.ArgTypes,
KernelArgs.ArgSizes, ArgTypesOverride.get(),
KernelArgs.ArgNames, KernelArgs.ArgMappers, TgtArgs,
TgtOffsets, PrivateArgumentManager, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
Expand Down Expand Up @@ -1473,7 +1543,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
// variables
Ret = processDataAfter(Loc, DeviceId, HostPtr, NumClangLaunchArgs,
KernelArgs.ArgBasePtrs, KernelArgs.ArgPtrs,
KernelArgs.ArgSizes, KernelArgs.ArgTypes,
KernelArgs.ArgSizes, ArgTypesOverride.get(),
KernelArgs.ArgNames, KernelArgs.ArgMappers,
PrivateArgumentManager, AsyncInfo);
if (Ret != OFFLOAD_SUCCESS) {
Expand Down
72 changes: 72 additions & 0 deletions offload/test/mapping/skip_transfers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// clang-format off
// RUN: %libomptarget-compilexx-generic
// RUN: env LIBOMPTARGET_INFO=128 %libomptarget-run-generic 2>&1 \
// RUN: | %fcheck-generic

// REQUIRES: gpu
// clang-format on

int main() {
float DataStack = 0;

// CHECK-NOT: omptarget device 0 info: from(unknown)[4] is not used and will not be copied
#pragma omp target map(from: DataStack)
{
DataStack = 1;
}

// CHECK-NOT: omptarget device 0 info: to(unknown)[4] is not used and will not be copied
#pragma omp target map(always to: DataStack)
;

// CHECK: omptarget device 0 info: tofrom(unknown)[4] is not used and will not be copied
#pragma omp target map(tofrom: DataStack)
;

int Size = 16;
double *Data = new double[Size];

// CHECK-NOT: omptarget device 0 info: tofrom(unknown)[8] is not used and will not be copied
#pragma omp target map(tofrom: Data[0:1])
{
Data[0] = 1;
}

// CHECK-NOT: omptarget device 0 info: tofrom(unknown)[16] is not used and will not be copied
#pragma omp target map(always tofrom: Data[0:2])
;

// CHECK: omptarget device 0 info: from(unknown)[24] is not used and will not be copied
#pragma omp target map(from: Data[0:3])
;

// CHECK: omptarget device 0 info: to(unknown)[24] is not used and will not be copied
#pragma omp target map(to: Data[0:3])
;

// CHECK: omptarget device 0 info: tofrom(unknown)[32] is not used and will not be copied
#pragma omp target map(tofrom: Data[0:4])
;

// CHECK-NOT: omptarget device 0 info: to(unknown)[40] is not used and will not be copied
#pragma omp target map(to: Data[0:5])
{
#pragma omp teams
Data[0] = 1;
}

struct {
double *Data;
} Wrapper { .Data = Data };

// CHECK-NOT: omptarget device 0 info: tofrom(unknown)[48] is not used and will not be copied
#pragma omp target map(tofrom: Wrapper.Data[0:6])
{
Wrapper.Data[0] = 1;
}

// CHECK: omptarget device 0 info: unknown(unknown)[8] is not used and will not be copied
// CHECK: omptarget device 0 info: tofrom(unknown)[56] is not used and will not be copied
#pragma omp target map(tofrom: Wrapper.Data[0:7])
;
}