Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@
#include "mlir/Dialect/OpenMP/OpenMPInterfaces.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Operation.h"
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"

#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include <algorithm>

namespace flangomp {
#define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS
Expand Down Expand Up @@ -154,6 +156,104 @@ class AutomapToTargetDataPass
addMapInfo(globalOp, loadOp);
}
}

// Move automapped descriptors from map() to has_device_addr in target ops.
auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool {
if (auto decl = mlir::dyn_cast_or_null<hlfir::DeclareOp>(
varPtr.getDefiningOp())) {
if (auto addrOp = mlir::dyn_cast_or_null<fir::AddrOfOp>(
decl.getMemref().getDefiningOp())) {
if (auto g =
mlir::SymbolTable::lookupNearestSymbolFrom<fir::GlobalOp>(
decl, addrOp.getSymbol()))
return automapGlobals.contains(g);
}
}
return false;
};

module.walk([&](mlir::omp::TargetOp target) {
// Collect candidates to move: descriptor maps of automapped globals.
llvm::SmallVector<mlir::Value> newMapOps;
llvm::SmallVector<unsigned> removedIndices;
llvm::SmallVector<mlir::Value> movedToHDA;
llvm::SmallVector<mlir::BlockArgument> oldMapArgsForMoved;

auto mapRange = target.getMapVars();
newMapOps.reserve(mapRange.size());

auto argIface = llvm::dyn_cast<mlir::omp::BlockArgOpenMPOpInterface>(
target.getOperation());
llvm::ArrayRef<mlir::BlockArgument> mapBlockArgs =
argIface.getMapBlockArgs();

for (auto [idx, mapVal] : llvm::enumerate(mapRange)) {
auto mapOp =
mlir::dyn_cast<mlir::omp::MapInfoOp>(mapVal.getDefiningOp());
if (!mapOp) {
newMapOps.push_back(mapVal);
continue;
}

mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType());
bool isDescriptor = mlir::isa<fir::BaseBoxType>(varTy);
if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) {
movedToHDA.push_back(mapVal);
removedIndices.push_back(idx);
oldMapArgsForMoved.push_back(mapBlockArgs[idx]);
} else {
newMapOps.push_back(mapVal);
}
}

if (movedToHDA.empty())
return;

// Update map vars to exclude moved entries.
mlir::MutableOperandRange mapMutable = target.getMapVarsMutable();
mapMutable.assign(newMapOps);

// Append moved entries to has_device_addr and insert corresponding block
// arguments.
mlir::MutableOperandRange hdaMutable =
target.getHasDeviceAddrVarsMutable();
llvm::SmallVector<mlir::Value> newHDA;
newHDA.reserve(hdaMutable.size() + movedToHDA.size());
llvm::for_each(hdaMutable.getAsOperandRange(),
[&](mlir::Value v) { newHDA.push_back(v); });

unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart();
unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs();
llvm::SmallVector<mlir::BlockArgument> newHDAArgsForMoved;
unsigned insertIndex = hdaStart + oldHdaCount;
for (mlir::Value v : movedToHDA) {
newHDA.push_back(v);
target->getRegion(0).insertArgument(insertIndex, v.getType(),
v.getLoc());
// Capture the newly inserted block argument.
newHDAArgsForMoved.push_back(
target->getRegion(0).getArgument(insertIndex));
insertIndex++;
}
hdaMutable.assign(newHDA);

// Redirect uses in the region: replace old map block args with the
// corresponding new has_device_addr block args.
for (auto [oldArg, newArg] :
llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved))
oldArg.replaceAllUsesWith(newArg);

// Finally, erase corresponding map block arguments (descending order).
unsigned mapStart = argIface.getMapBlockArgsStart();
// Convert indices to absolute argument numbers before erasing.
llvm::SmallVector<unsigned> absArgNos;
absArgNos.reserve(removedIndices.size());
for (unsigned idx : removedIndices)
absArgNos.push_back(mapStart + idx);
std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>());
for (unsigned absNo : absArgNos)
target->getRegion(0).eraseArgument(absNo);
});
}
};
} // namespace
32 changes: 32 additions & 0 deletions offload/libomptarget/omptarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Object/ObjectFile.h"

#include "flang/ISO_Fortran_binding.h"

#include <algorithm>
#include <cassert>
#include <cstdint>
#include <vector>
Expand Down Expand Up @@ -378,6 +381,34 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin,
return TgtPteeBase;
}

// Fortran pointer attachments treated descriptors as plain pointers, so
// automapped arrays lose their declared bounds on the device. Recognize
// CFI descriptors to compute their actual size before copying, ensuring the
// full descriptor (including bounds) is transferred during attachment.
static int64_t getFortranDescriptorSize(void **HstPtrAddr,
int64_t ReportedSize) {
constexpr int64_t VoidPtrSize = sizeof(void *);

if (!HstPtrAddr || ReportedSize > VoidPtrSize)
return ReportedSize;

const CFI_cdesc_t *Desc = reinterpret_cast<const CFI_cdesc_t *>(HstPtrAddr);

if (Desc->version != CFI_VERSION)
return ReportedSize;

if (Desc->rank > CFI_MAX_RANK)
return ReportedSize;

const char *RawDesc = reinterpret_cast<const char *>(Desc);
const char *DimsAddr = reinterpret_cast<const char *>(&Desc->dim);
size_t HeaderBytes = static_cast<size_t>(DimsAddr - RawDesc);
size_t DimsBytes = static_cast<size_t>(Desc->rank) * sizeof(CFI_dim_t);
size_t TotalBytes = HeaderBytes + DimsBytes;

return std::max<int64_t>(ReportedSize, static_cast<int64_t>(TotalBytes));
}

/// Utility function to perform a pointer attachment operation.
///
/// For something like:
Expand Down Expand Up @@ -445,6 +476,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo,
"Need a valid pointer entry to perform pointer-attachment");

constexpr int64_t VoidPtrSize = sizeof(void *);
HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize);
assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small");

void *TgtPteeBase =
Expand Down
3 changes: 0 additions & 3 deletions offload/test/offloading/fortran/declare-target-automap.f90
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
!Offloading test for AUTOMAP modifier in declare target enter
! REQUIRES: flang, amdgpu

! FIXME: https://github.com/llvm/llvm-project/issues/161265
! XFAIL: amdgpu

! RUN: %libomptarget-compile-fortran-run-and-check-generic
program automap_program
use iso_c_binding, only: c_loc
Expand Down
Loading