diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp index 8b9991301aae8..53f8e801bbbe1 100644 --- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -18,9 +18,11 @@ #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Operation.h" +#include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include namespace flangomp { #define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS @@ -154,6 +156,107 @@ class AutomapToTargetDataPass addMapInfo(globalOp, loadOp); } } + + // Move automapped descriptors from map() to has_device_addr in target ops. + auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool { + if (auto decl = mlir::dyn_cast_or_null( + varPtr.getDefiningOp())) { + if (auto addrOp = mlir::dyn_cast_or_null( + decl.getMemref().getDefiningOp())) { + if (auto g = + mlir::SymbolTable::lookupNearestSymbolFrom( + decl, addrOp.getSymbol())) + return automapGlobals.contains(g); + } + } + return false; + }; + + module.walk([&](mlir::omp::TargetOp target) { + // Collect candidates to move: descriptor maps of automapped globals. + llvm::SmallVector newMapOps; + llvm::SmallVector removedIndices; + llvm::SmallVector movedToHDA; + llvm::SmallVector oldMapArgsForMoved; + + auto mapRange = target.getMapVars(); + newMapOps.reserve(mapRange.size()); + + auto argIface = llvm::dyn_cast( + target.getOperation()); + llvm::ArrayRef mapBlockArgs = + argIface.getMapBlockArgs(); + + for (auto [idx, mapVal] : llvm::enumerate(mapRange)) { + auto mapOp = + mlir::dyn_cast(mapVal.getDefiningOp()); + if (!mapOp) { + newMapOps.push_back(mapVal); + continue; + } + + mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType()); + bool isDescriptor = mlir::isa(varTy); + if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) { + movedToHDA.push_back(mapVal); + removedIndices.push_back(idx); + oldMapArgsForMoved.push_back(mapBlockArgs[idx]); + } else { + newMapOps.push_back(mapVal); + } + } + + if (movedToHDA.empty()) + return; + + // Update map vars to exclude moved entries. + mlir::MutableOperandRange mapMutable = target.getMapVarsMutable(); + mapMutable.assign(newMapOps); + + // Append moved entries to has_device_addr and insert corresponding block + // arguments. + mlir::MutableOperandRange hdaMutable = + target.getHasDeviceAddrVarsMutable(); + llvm::SmallVector newHDA; + newHDA.reserve(hdaMutable.size() + movedToHDA.size()); + llvm::for_each(hdaMutable.getAsOperandRange(), + [&](mlir::Value v) { newHDA.push_back(v); }); + + unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart(); + unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs(); + llvm::SmallVector newHDAArgsForMoved; + unsigned insertIndex = hdaStart + oldHdaCount; + for (mlir::Value v : movedToHDA) { + newHDA.push_back(v); + target->getRegion(0).insertArgument(insertIndex, v.getType(), + v.getLoc()); + // Capture the newly inserted block argument. + newHDAArgsForMoved.push_back( + target->getRegion(0).getArgument(insertIndex)); + insertIndex++; + } + hdaMutable.assign(newHDA); + + // Redirect uses in the region: replace old map block args with the + // corresponding new has_device_addr block args. + for (auto [oldArg, newArg] : + llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved)) + oldArg.replaceAllUsesWith(newArg); + + // Finally, erase corresponding map block arguments in descending order. + // Descending order is necessary to avoid index invalidation: erasing + // arguments from highest to lowest index ensures that earlier erases do + // not shift the indices of arguments yet to be erased. + unsigned mapStart = argIface.getMapBlockArgsStart(); + // Convert indices to absolute argument numbers before erasing. + llvm::SmallVector absArgNos; + absArgNos.reserve(removedIndices.size()); + for (unsigned idx : removedIndices) + absArgNos.push_back(mapStart + idx); + std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>()); + for (unsigned absNo : absArgNos) + target->getRegion(0).eraseArgument(absNo); + }); } }; } // namespace diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index a1950cbb62908..ec148926e53e5 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -33,6 +33,9 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Object/ObjectFile.h" +#include "flang/ISO_Fortran_binding.h" + +#include #include #include #include @@ -378,6 +381,34 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin, return TgtPteeBase; } +// Fortran pointer attachments treat descriptors as plain pointers, so +// automapped arrays lose their declared bounds on the device. Recognize +// CFI descriptors to compute their actual size before copying, ensuring the +// full descriptor (including bounds) is transferred during attachment. +static int64_t getFortranDescriptorSize(void **HstPtrAddr, + int64_t ReportedSize) { + constexpr int64_t VoidPtrSize = sizeof(void *); + + if (!HstPtrAddr || ReportedSize > VoidPtrSize) + return ReportedSize; + + const CFI_cdesc_t *Desc = reinterpret_cast(HstPtrAddr); + + if (Desc->version != CFI_VERSION) + return ReportedSize; + + if (Desc->rank > CFI_MAX_RANK) + return ReportedSize; + + const char *RawDesc = reinterpret_cast(Desc); + const char *DimsAddr = reinterpret_cast(&Desc->dim); + size_t HeaderBytes = static_cast(DimsAddr - RawDesc); + size_t DimsBytes = static_cast(Desc->rank) * sizeof(CFI_dim_t); + size_t TotalBytes = HeaderBytes + DimsBytes; + + return std::max(ReportedSize, static_cast(TotalBytes)); +} + /// Utility function to perform a pointer attachment operation. /// /// For something like: @@ -445,6 +476,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, "Need a valid pointer entry to perform pointer-attachment"); constexpr int64_t VoidPtrSize = sizeof(void *); + HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize); assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small"); void *TgtPteeBase = diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90 index b44c0b2815274..b9c2d34c834fa 100644 --- a/offload/test/offloading/fortran/declare-target-automap.f90 +++ b/offload/test/offloading/fortran/declare-target-automap.f90 @@ -1,9 +1,6 @@ !Offloading test for AUTOMAP modifier in declare target enter ! REQUIRES: flang, amdgpu -! FIXME: https://github.com/llvm/llvm-project/issues/161265 -! XFAIL: amdgpu - ! RUN: %libomptarget-compile-fortran-run-and-check-generic program automap_program use iso_c_binding, only: c_loc