From 65f4c221a27f638b18268975ba7e10675820ead4 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Wed, 8 Oct 2025 16:51:51 +0100 Subject: [PATCH 1/2] [Flang][OpenMP] Fix Fortran automap handling - Move automapped Fortran descriptor mappings from map() to has_device_addr in target regions, updating block args and uses accordingly. - In libomptarget, detect CFI descriptors during pointer attachment and compute the correct descriptor size to transfer full metadata (including bounds). - Resolves lost bounds for automapped Fortran arrays on device; no change for C/C++. This fixes test offload/test/offloading/fortran/declare-target-automap.f90 reported broken in #161265. --- .../Optimizer/OpenMP/AutomapToTargetData.cpp | 100 ++++++++++++++++++ offload/libomptarget/omptarget.cpp | 32 ++++++ .../fortran/declare-target-automap.f90 | 3 - 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp index 8b9991301aae8..2edbef151ea4a 100644 --- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -18,9 +18,11 @@ #include "mlir/Dialect/OpenMP/OpenMPInterfaces.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Operation.h" +#include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include namespace flangomp { #define GEN_PASS_DEF_AUTOMAPTOTARGETDATAPASS @@ -154,6 +156,104 @@ class AutomapToTargetDataPass addMapInfo(globalOp, loadOp); } } + + // Move automapped descriptors from map() to has_device_addr in target ops. + auto originatesFromAutomapGlobal = [&](mlir::Value varPtr) -> bool { + if (auto decl = mlir::dyn_cast_or_null( + varPtr.getDefiningOp())) { + if (auto addrOp = mlir::dyn_cast_or_null( + decl.getMemref().getDefiningOp())) { + if (auto g = + mlir::SymbolTable::lookupNearestSymbolFrom( + decl, addrOp.getSymbol())) + return automapGlobals.contains(g); + } + } + return false; + }; + + module.walk([&](mlir::omp::TargetOp target) { + // Collect candidates to move: descriptor maps of automapped globals. + llvm::SmallVector newMapOps; + llvm::SmallVector removedIndices; + llvm::SmallVector movedToHDA; + llvm::SmallVector oldMapArgsForMoved; + + auto mapRange = target.getMapVars(); + newMapOps.reserve(mapRange.size()); + + auto argIface = llvm::dyn_cast( + target.getOperation()); + llvm::ArrayRef mapBlockArgs = + argIface.getMapBlockArgs(); + + for (auto [idx, mapVal] : llvm::enumerate(mapRange)) { + auto mapOp = + mlir::dyn_cast(mapVal.getDefiningOp()); + if (!mapOp) { + newMapOps.push_back(mapVal); + continue; + } + + mlir::Type varTy = fir::unwrapRefType(mapOp.getVarType()); + bool isDescriptor = mlir::isa(varTy); + if (isDescriptor && originatesFromAutomapGlobal(mapOp.getVarPtr())) { + movedToHDA.push_back(mapVal); + removedIndices.push_back(idx); + oldMapArgsForMoved.push_back(mapBlockArgs[idx]); + } else { + newMapOps.push_back(mapVal); + } + } + + if (movedToHDA.empty()) + return; + + // Update map vars to exclude moved entries. + mlir::MutableOperandRange mapMutable = target.getMapVarsMutable(); + mapMutable.assign(newMapOps); + + // Append moved entries to has_device_addr and insert corresponding block + // arguments. + mlir::MutableOperandRange hdaMutable = + target.getHasDeviceAddrVarsMutable(); + llvm::SmallVector newHDA; + newHDA.reserve(hdaMutable.size() + movedToHDA.size()); + llvm::for_each(hdaMutable.getAsOperandRange(), + [&](mlir::Value v) { newHDA.push_back(v); }); + + unsigned hdaStart = argIface.getHasDeviceAddrBlockArgsStart(); + unsigned oldHdaCount = argIface.numHasDeviceAddrBlockArgs(); + llvm::SmallVector newHDAArgsForMoved; + unsigned insertIndex = hdaStart + oldHdaCount; + for (mlir::Value v : movedToHDA) { + newHDA.push_back(v); + target->getRegion(0).insertArgument(insertIndex, v.getType(), + v.getLoc()); + // Capture the newly inserted block argument. + newHDAArgsForMoved.push_back( + target->getRegion(0).getArgument(insertIndex)); + insertIndex++; + } + hdaMutable.assign(newHDA); + + // Redirect uses in the region: replace old map block args with the + // corresponding new has_device_addr block args. + for (auto [oldArg, newArg] : + llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved)) + oldArg.replaceAllUsesWith(newArg); + + // Finally, erase corresponding map block arguments (descending order). + unsigned mapStart = argIface.getMapBlockArgsStart(); + // Convert indices to absolute argument numbers before erasing. + llvm::SmallVector absArgNos; + absArgNos.reserve(removedIndices.size()); + for (unsigned idx : removedIndices) + absArgNos.push_back(mapStart + idx); + std::sort(absArgNos.begin(), absArgNos.end(), std::greater<>()); + for (unsigned absNo : absArgNos) + target->getRegion(0).eraseArgument(absNo); + }); } }; } // namespace diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index a1950cbb62908..02c83b4a51078 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -33,6 +33,9 @@ #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Object/ObjectFile.h" +#include "flang/ISO_Fortran_binding.h" + +#include #include #include #include @@ -378,6 +381,34 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin, return TgtPteeBase; } +// Fortran pointer attachments treated descriptors as plain pointers, so +// automapped arrays lose their declared bounds on the device. Recognize +// CFI descriptors to compute their actual size before copying, ensuring the +// full descriptor (including bounds) is transferred during attachment. +static int64_t getFortranDescriptorSize(void **HstPtrAddr, + int64_t ReportedSize) { + constexpr int64_t VoidPtrSize = sizeof(void *); + + if (!HstPtrAddr || ReportedSize > VoidPtrSize) + return ReportedSize; + + const CFI_cdesc_t *Desc = reinterpret_cast(HstPtrAddr); + + if (Desc->version != CFI_VERSION) + return ReportedSize; + + if (Desc->rank > CFI_MAX_RANK) + return ReportedSize; + + const char *RawDesc = reinterpret_cast(Desc); + const char *DimsAddr = reinterpret_cast(&Desc->dim); + size_t HeaderBytes = static_cast(DimsAddr - RawDesc); + size_t DimsBytes = static_cast(Desc->rank) * sizeof(CFI_dim_t); + size_t TotalBytes = HeaderBytes + DimsBytes; + + return std::max(ReportedSize, static_cast(TotalBytes)); +} + /// Utility function to perform a pointer attachment operation. /// /// For something like: @@ -445,6 +476,7 @@ static int performPointerAttachment(DeviceTy &Device, AsyncInfoTy &AsyncInfo, "Need a valid pointer entry to perform pointer-attachment"); constexpr int64_t VoidPtrSize = sizeof(void *); + HstPtrSize = getFortranDescriptorSize(HstPtrAddr, HstPtrSize); assert(HstPtrSize >= VoidPtrSize && "PointerSize is too small"); void *TgtPteeBase = diff --git a/offload/test/offloading/fortran/declare-target-automap.f90 b/offload/test/offloading/fortran/declare-target-automap.f90 index b44c0b2815274..b9c2d34c834fa 100644 --- a/offload/test/offloading/fortran/declare-target-automap.f90 +++ b/offload/test/offloading/fortran/declare-target-automap.f90 @@ -1,9 +1,6 @@ !Offloading test for AUTOMAP modifier in declare target enter ! REQUIRES: flang, amdgpu -! FIXME: https://github.com/llvm/llvm-project/issues/161265 -! XFAIL: amdgpu - ! RUN: %libomptarget-compile-fortran-run-and-check-generic program automap_program use iso_c_binding, only: c_loc From ebccde548d8968f7f00e73255fcc0e9e982d126b Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Thu, 9 Oct 2025 16:30:31 +0100 Subject: [PATCH 2/2] Address copilot suggestions. --- flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp | 5 ++++- offload/libomptarget/omptarget.cpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp index 2edbef151ea4a..53f8e801bbbe1 100644 --- a/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp +++ b/flang/lib/Optimizer/OpenMP/AutomapToTargetData.cpp @@ -243,7 +243,10 @@ class AutomapToTargetDataPass llvm::zip_equal(oldMapArgsForMoved, newHDAArgsForMoved)) oldArg.replaceAllUsesWith(newArg); - // Finally, erase corresponding map block arguments (descending order). + // Finally, erase corresponding map block arguments in descending order. + // Descending order is necessary to avoid index invalidation: erasing + // arguments from highest to lowest index ensures that earlier erases do + // not shift the indices of arguments yet to be erased. unsigned mapStart = argIface.getMapBlockArgsStart(); // Convert indices to absolute argument numbers before erasing. llvm::SmallVector absArgNos; diff --git a/offload/libomptarget/omptarget.cpp b/offload/libomptarget/omptarget.cpp index 02c83b4a51078..ec148926e53e5 100644 --- a/offload/libomptarget/omptarget.cpp +++ b/offload/libomptarget/omptarget.cpp @@ -381,7 +381,7 @@ static void *calculateTargetPointeeBase(void *HstPteeBase, void *HstPteeBegin, return TgtPteeBase; } -// Fortran pointer attachments treated descriptors as plain pointers, so +// Fortran pointer attachments treat descriptors as plain pointers, so // automapped arrays lose their declared bounds on the device. Recognize // CFI descriptors to compute their actual size before copying, ensuring the // full descriptor (including bounds) is transferred during attachment.