diff --git a/flang/include/flang/Lower/Cuda.h b/flang/include/flang/Lower/CUDA.h similarity index 61% rename from flang/include/flang/Lower/Cuda.h rename to flang/include/flang/Lower/CUDA.h index b6f849e3d63f0..6c2e6d71a123e 100644 --- a/flang/include/flang/Lower/Cuda.h +++ b/flang/include/flang/Lower/CUDA.h @@ -1,4 +1,4 @@ -//===-- Lower/Cuda.h -- Cuda Fortran utilities ------------------*- C++ -*-===// +//===-- Lower/CUDA.h -- CUDA Fortran utilities ------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -14,13 +14,23 @@ #define FORTRAN_LOWER_CUDA_H #include "flang/Optimizer/Builder/FIRBuilder.h" +#include "flang/Optimizer/Builder/MutableBox.h" #include "flang/Optimizer/Dialect/CUF/CUFOps.h" +#include "flang/Runtime/allocator-registry-consts.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/OpenACC/OpenACC.h" +namespace mlir { +class Value; +class Location; +class MLIRContext; +} // namespace mlir + namespace Fortran::lower { +class AbstractConverter; + static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { std::optional cudaAttr = Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate()); @@ -37,6 +47,21 @@ static inline unsigned getAllocatorIdx(const Fortran::semantics::Symbol &sym) { return kDefaultAllocator; } +void initializeDeviceComponentAllocator( + Fortran::lower::AbstractConverter &converter, + const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box); + +mlir::Type gatherDeviceComponentCoordinatesAndType( + fir::FirOpBuilder &builder, mlir::Location loc, + const Fortran::semantics::Symbol &sym, fir::RecordType recTy, + llvm::SmallVector &coordinates); + +/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute +/// representation. +cuf::DataAttributeAttr +translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext, + const Fortran::semantics::Symbol &sym); + } // end namespace Fortran::lower #endif // FORTRAN_LOWER_CUDA_H diff --git a/flang/include/flang/Lower/ConvertVariable.h b/flang/include/flang/Lower/ConvertVariable.h index e05625a229ac7..b938f6be196af 100644 --- a/flang/include/flang/Lower/ConvertVariable.h +++ b/flang/include/flang/Lower/ConvertVariable.h @@ -162,12 +162,6 @@ translateSymbolAttributes(mlir::MLIRContext *mlirContext, fir::FortranVariableFlagsEnum extraFlags = fir::FortranVariableFlagsEnum::None); -/// Translate the CUDA Fortran attributes of \p sym into the FIR CUDA attribute -/// representation. -cuf::DataAttributeAttr -translateSymbolCUFDataAttribute(mlir::MLIRContext *mlirContext, - const Fortran::semantics::Symbol &sym); - /// Map a symbol to a given fir::ExtendedValue. This will generate an /// hlfir.declare when lowering to HLFIR and map the hlfir.declare result to the /// symbol. diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 219f9205f45d5..ce9d8944387e1 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -13,9 +13,9 @@ #include "flang/Lower/Allocatable.h" #include "flang/Evaluate/tools.h" #include "flang/Lower/AbstractConverter.h" +#include "flang/Lower/CUDA.h" #include "flang/Lower/ConvertType.h" #include "flang/Lower/ConvertVariable.h" -#include "flang/Lower/Cuda.h" #include "flang/Lower/IterationSpace.h" #include "flang/Lower/Mangler.h" #include "flang/Lower/OpenACC.h" @@ -445,10 +445,14 @@ class AllocateStmtHelper { /*mustBeHeap=*/true); } - void postAllocationAction(const Allocation &alloc) { + void postAllocationAction(const Allocation &alloc, + const fir::MutableBoxValue &box) { if (alloc.getSymbol().test(Fortran::semantics::Symbol::Flag::AccDeclare)) Fortran::lower::attachDeclarePostAllocAction(converter, builder, alloc.getSymbol()); + if (Fortran::semantics::HasCUDAComponent(alloc.getSymbol())) + Fortran::lower::initializeDeviceComponentAllocator( + converter, alloc.getSymbol(), box); } void setPinnedToFalse() { @@ -481,7 +485,7 @@ class AllocateStmtHelper { // Pointers must use PointerAllocate so that their deallocations // can be validated. genInlinedAllocation(alloc, box); - postAllocationAction(alloc); + postAllocationAction(alloc, box); setPinnedToFalse(); return; } @@ -504,7 +508,7 @@ class AllocateStmtHelper { genCudaAllocate(builder, loc, box, errorManager, alloc.getSymbol()); } fir::factory::syncMutableBoxFromIRBox(builder, loc, box); - postAllocationAction(alloc); + postAllocationAction(alloc, box); errorManager.assignStat(builder, loc, stat); } @@ -647,7 +651,7 @@ class AllocateStmtHelper { setPinnedToFalse(); } fir::factory::syncMutableBoxFromIRBox(builder, loc, box); - postAllocationAction(alloc); + postAllocationAction(alloc, box); errorManager.assignStat(builder, loc, stat); } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 6b7efe6b57db3..1bad46f80612b 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -13,6 +13,7 @@ #include "flang/Lower/Bridge.h" #include "flang/Lower/Allocatable.h" +#include "flang/Lower/CUDA.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/Coarray.h" #include "flang/Lower/ConvertCall.h" @@ -20,7 +21,6 @@ #include "flang/Lower/ConvertExprToHLFIR.h" #include "flang/Lower/ConvertType.h" #include "flang/Lower/ConvertVariable.h" -#include "flang/Lower/Cuda.h" #include "flang/Lower/DirectivesCommon.h" #include "flang/Lower/HostAssociations.h" #include "flang/Lower/IO.h" diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt index 8e20abf0e9f2d..1d1c7ddda8e9b 100644 --- a/flang/lib/Lower/CMakeLists.txt +++ b/flang/lib/Lower/CMakeLists.txt @@ -15,6 +15,7 @@ add_flang_library(FortranLower ConvertProcedureDesignator.cpp ConvertType.cpp ConvertVariable.cpp + CUDA.cpp CustomIntrinsicCall.cpp HlfirIntrinsics.cpp HostAssociations.cpp diff --git a/flang/lib/Lower/CUDA.cpp b/flang/lib/Lower/CUDA.cpp new file mode 100644 index 0000000000000..31147c5c9e563 --- /dev/null +++ b/flang/lib/Lower/CUDA.cpp @@ -0,0 +1,155 @@ +//===-- CUDA.cpp -- CUDA Fortran specific lowering ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ +// +//===----------------------------------------------------------------------===// + +#include "flang/Lower/CUDA.h" + +#define DEBUG_TYPE "flang-lower-cuda" + +void Fortran::lower::initializeDeviceComponentAllocator( + Fortran::lower::AbstractConverter &converter, + const Fortran::semantics::Symbol &sym, const fir::MutableBoxValue &box) { + if (const auto *details{ + sym.GetUltimate() + .detailsIf()}) { + const Fortran::semantics::DeclTypeSpec *type{details->type()}; + const Fortran::semantics::DerivedTypeSpec *derived{type ? type->AsDerived() + : nullptr}; + if (derived) { + if (!FindCUDADeviceAllocatableUltimateComponent(*derived)) + return; // No device components. + + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + mlir::Location loc = converter.getCurrentLocation(); + + mlir::Type baseTy = fir::unwrapRefType(box.getAddr().getType()); + + // Only pointer and allocatable needs post allocation initialization + // of components descriptors. + if (!fir::isAllocatableType(baseTy) && !fir::isPointerType(baseTy)) + return; + + // Extract the derived type. + mlir::Type ty = fir::getDerivedType(baseTy); + auto recTy = mlir::dyn_cast(ty); + assert(recTy && "expected fir::RecordType"); + + if (auto boxTy = mlir::dyn_cast(baseTy)) + baseTy = boxTy.getEleTy(); + baseTy = fir::unwrapRefType(baseTy); + + Fortran::semantics::UltimateComponentIterator components{*derived}; + mlir::Value loadedBox = fir::LoadOp::create(builder, loc, box.getAddr()); + mlir::Value addr; + if (auto seqTy = mlir::dyn_cast(baseTy)) { + mlir::Type idxTy = builder.getIndexType(); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); + llvm::SmallVector loops; + llvm::SmallVector indices; + llvm::SmallVector extents; + for (unsigned i = 0; i < seqTy.getDimension(); ++i) { + mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); + auto dimInfo = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, + idxTy, loadedBox, dim); + mlir::Value lbub = mlir::arith::AddIOp::create( + builder, loc, dimInfo.getResult(0), dimInfo.getResult(1)); + mlir::Value ext = + mlir::arith::SubIOp::create(builder, loc, lbub, one); + mlir::Value cmp = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::sgt, ext, zero); + ext = mlir::arith::SelectOp::create(builder, loc, cmp, ext, zero); + extents.push_back(ext); + + auto loop = fir::DoLoopOp::create( + builder, loc, dimInfo.getResult(0), dimInfo.getResult(1), + dimInfo.getResult(2), /*isUnordered=*/true, + /*finalCount=*/false, mlir::ValueRange{}); + loops.push_back(loop); + indices.push_back(loop.getInductionVar()); + builder.setInsertionPointToStart(loop.getBody()); + } + mlir::Value boxAddr = fir::BoxAddrOp::create(builder, loc, loadedBox); + auto shape = fir::ShapeOp::create(builder, loc, extents); + addr = fir::ArrayCoorOp::create( + builder, loc, fir::ReferenceType::get(recTy), boxAddr, shape, + /*slice=*/mlir::Value{}, indices, /*typeparms=*/mlir::ValueRange{}); + } else { + addr = fir::BoxAddrOp::create(builder, loc, loadedBox); + } + for (const auto &compSym : components) { + if (Fortran::semantics::IsDeviceAllocatable(compSym)) { + llvm::SmallVector coord; + mlir::Type fieldTy = gatherDeviceComponentCoordinatesAndType( + builder, loc, compSym, recTy, coord); + assert(coord.size() == 1 && "expect one coordinate"); + mlir::Value comp = fir::CoordinateOp::create( + builder, loc, builder.getRefType(fieldTy), addr, coord[0]); + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute( + builder.getContext(), compSym); + cuf::SetAllocatorIndexOp::create(builder, loc, comp, dataAttr); + } + } + } + } +} + +mlir::Type Fortran::lower::gatherDeviceComponentCoordinatesAndType( + fir::FirOpBuilder &builder, mlir::Location loc, + const Fortran::semantics::Symbol &sym, fir::RecordType recTy, + llvm::SmallVector &coordinates) { + unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString()); + mlir::Type fieldTy; + if (fieldIdx != std::numeric_limits::max()) { + // Field found in the base record type. + auto fieldName = recTy.getTypeList()[fieldIdx].first; + fieldTy = recTy.getTypeList()[fieldIdx].second; + mlir::Value fieldIndex = fir::FieldIndexOp::create( + builder, loc, fir::FieldType::get(fieldTy.getContext()), fieldName, + recTy, + /*typeParams=*/mlir::ValueRange{}); + coordinates.push_back(fieldIndex); + } else { + // Field not found in base record type, search in potential + // record type components. + for (auto component : recTy.getTypeList()) { + if (auto childRecTy = mlir::dyn_cast(component.second)) { + fieldIdx = childRecTy.getFieldIndex(sym.name().ToString()); + if (fieldIdx != std::numeric_limits::max()) { + mlir::Value parentFieldIndex = fir::FieldIndexOp::create( + builder, loc, fir::FieldType::get(childRecTy.getContext()), + component.first, recTy, + /*typeParams=*/mlir::ValueRange{}); + coordinates.push_back(parentFieldIndex); + auto fieldName = childRecTy.getTypeList()[fieldIdx].first; + fieldTy = childRecTy.getTypeList()[fieldIdx].second; + mlir::Value childFieldIndex = fir::FieldIndexOp::create( + builder, loc, fir::FieldType::get(fieldTy.getContext()), + fieldName, childRecTy, + /*typeParams=*/mlir::ValueRange{}); + coordinates.push_back(childFieldIndex); + break; + } + } + } + } + if (coordinates.empty()) + TODO(loc, "device resident component in complex derived-type hierarchy"); + return fieldTy; +} + +cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute( + mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) { + std::optional cudaAttr = + Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate()); + return cuf::getDataAttribute(mlirContext, cudaAttr); +} diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index a4a8a697e02ae..fd66592bc285b 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -14,12 +14,12 @@ #include "flang/Lower/AbstractConverter.h" #include "flang/Lower/Allocatable.h" #include "flang/Lower/BoxAnalyzer.h" +#include "flang/Lower/CUDA.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/ConvertConstant.h" #include "flang/Lower/ConvertExpr.h" #include "flang/Lower/ConvertExprToHLFIR.h" #include "flang/Lower/ConvertProcedureDesignator.h" -#include "flang/Lower/Cuda.h" #include "flang/Lower/Mangler.h" #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" @@ -814,81 +814,24 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, baseTy = boxTy.getEleTy(); baseTy = fir::unwrapRefType(baseTy); - if (mlir::isa(baseTy) && - (fir::isAllocatableType(fir::getBase(exv).getType()) || - fir::isPointerType(fir::getBase(exv).getType()))) + if (fir::isAllocatableType(fir::getBase(exv).getType()) || + fir::isPointerType(fir::getBase(exv).getType())) return; // Allocator index need to be set after allocation. auto recTy = mlir::dyn_cast(fir::unwrapSequenceType(baseTy)); assert(recTy && "expected fir::RecordType"); - llvm::SmallVector coordinates; Fortran::semantics::UltimateComponentIterator components{*derived}; for (const auto &sym : components) { if (Fortran::semantics::IsDeviceAllocatable(sym)) { - unsigned fieldIdx = recTy.getFieldIndex(sym.name().ToString()); - mlir::Type fieldTy; - llvm::SmallVector coordinates; - - if (fieldIdx != std::numeric_limits::max()) { - // Field found in the base record type. - auto fieldName = recTy.getTypeList()[fieldIdx].first; - fieldTy = recTy.getTypeList()[fieldIdx].second; - mlir::Value fieldIndex = fir::FieldIndexOp::create( - builder, loc, fir::FieldType::get(fieldTy.getContext()), - fieldName, recTy, - /*typeParams=*/mlir::ValueRange{}); - coordinates.push_back(fieldIndex); - } else { - // Field not found in base record type, search in potential - // record type components. - for (auto component : recTy.getTypeList()) { - if (auto childRecTy = - mlir::dyn_cast(component.second)) { - fieldIdx = childRecTy.getFieldIndex(sym.name().ToString()); - if (fieldIdx != std::numeric_limits::max()) { - mlir::Value parentFieldIndex = fir::FieldIndexOp::create( - builder, loc, - fir::FieldType::get(childRecTy.getContext()), - component.first, recTy, - /*typeParams=*/mlir::ValueRange{}); - coordinates.push_back(parentFieldIndex); - auto fieldName = childRecTy.getTypeList()[fieldIdx].first; - fieldTy = childRecTy.getTypeList()[fieldIdx].second; - mlir::Value childFieldIndex = fir::FieldIndexOp::create( - builder, loc, fir::FieldType::get(fieldTy.getContext()), - fieldName, childRecTy, - /*typeParams=*/mlir::ValueRange{}); - coordinates.push_back(childFieldIndex); - break; - } - } - } - } - - if (coordinates.empty()) - TODO(loc, "device resident component in complex derived-type " - "hierarchy"); - + llvm::SmallVector coord; + mlir::Type fieldTy = + Fortran::lower::gatherDeviceComponentCoordinatesAndType( + builder, loc, sym, recTy, coord); mlir::Value base = fir::getBase(exv); - mlir::Value comp; - if (mlir::isa(fir::unwrapRefType(base.getType()))) { - mlir::Value box = fir::LoadOp::create(builder, loc, base); - mlir::Value addr = fir::BoxAddrOp::create(builder, loc, box); - llvm::SmallVector lenParams; - assert(coordinates.size() == 1 && "expect one coordinate"); - auto field = mlir::dyn_cast( - coordinates[0].getDefiningOp()); - comp = hlfir::DesignateOp::create( - builder, loc, builder.getRefType(fieldTy), addr, - /*component=*/field.getFieldName(), - /*componentShape=*/mlir::Value{}, - hlfir::DesignateOp::Subscripts{}); - } else { - comp = fir::CoordinateOp::create( - builder, loc, builder.getRefType(fieldTy), base, coordinates); - } + mlir::Value comp = fir::CoordinateOp::create( + builder, loc, builder.getRefType(fieldTy), base, coord); cuf::DataAttributeAttr dataAttr = Fortran::lower::translateSymbolCUFDataAttribute( builder.getContext(), sym); @@ -1950,13 +1893,6 @@ fir::FortranVariableFlagsAttr Fortran::lower::translateSymbolAttributes( return fir::FortranVariableFlagsAttr::get(mlirContext, flags); } -cuf::DataAttributeAttr Fortran::lower::translateSymbolCUFDataAttribute( - mlir::MLIRContext *mlirContext, const Fortran::semantics::Symbol &sym) { - std::optional cudaAttr = - Fortran::semantics::GetCUDADataAttr(&sym.GetUltimate()); - return cuf::getDataAttribute(mlirContext, cudaAttr); -} - static bool isCapturedInInternalProcedure(Fortran::lower::AbstractConverter &converter, const Fortran::semantics::Symbol &sym) { diff --git a/flang/test/Lower/CUDA/cuda-set-allocator.cuf b/flang/test/Lower/CUDA/cuda-set-allocator.cuf index e3bb181f65398..d783f340fe9a4 100644 --- a/flang/test/Lower/CUDA/cuda-set-allocator.cuf +++ b/flang/test/Lower/CUDA/cuda-set-allocator.cuf @@ -23,34 +23,44 @@ contains subroutine sub2() type(ty_device), pointer :: d1 + allocate(d1) end subroutine ! CHECK-LABEL: func.func @_QMm1Psub2() ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box>>,y:i32,z:!fir.box>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub2Ed1"} -> !fir.ref>>,y:i32,z:!fir.box>>}>>>> ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMm1Fsub2Ed1"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>>>, !fir.ref>>,y:i32,z:!fir.box>>}>>>>) -! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: cuf.allocate +! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.ptr>>,y:i32,z:!fir.box>>}>> +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.ptr>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref>>> {data_attr = #cuf.cuda} subroutine sub3() type(ty_device), allocatable :: d1 + allocate(d1) end subroutine ! CHECK-LABEL: func.func @_QMm1Psub3() ! CHECK: %[[ALLOC:.*]] = cuf.alloc !fir.box>>,y:i32,z:!fir.box>>}>>> {bindc_name = "d1", data_attr = #cuf.cuda, uniq_name = "_QMm1Fsub3Ed1"} -> !fir.ref>>,y:i32,z:!fir.box>>}>>>> ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMm1Fsub3Ed1"} : (!fir.ref>>,y:i32,z:!fir.box>>}>>>>) -> (!fir.ref>>,y:i32,z:!fir.box>>}>>>>, !fir.ref>>,y:i32,z:!fir.box>>}>>>>) -! CHECK: %[[LOAD1:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR1:.*]] = fir.box_addr %[[LOAD1]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[DESIGNATE1:.*]] = hlfir.designate %[[ADDR1]]{"x"} : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[DESIGNATE1]] : !fir.ref>>> {data_attr = #cuf.cuda} -! CHECK: %[[LOAD2:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> -! CHECK: %[[ADDR2:.*]] = fir.box_addr %[[LOAD2]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> -! CHECK: %[[DESIGNATE2:.*]] = hlfir.designate %[[ADDR2]]{"z"} : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> -! CHECK: cuf.set_allocator_idx %[[DESIGNATE2]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: cuf.allocate +! CHECK: %[[LOAD:.*]] = fir.load %[[DECL]]#0 : !fir.ref>>,y:i32,z:!fir.box>>}>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>>,y:i32,z:!fir.box>>}>>>) -> !fir.heap>>,y:i32,z:!fir.box>>}>> +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ADDR]], x : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[COORD1]] : !fir.ref>>> {data_attr = #cuf.cuda} +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ADDR]], z : (!fir.heap>>,y:i32,z:!fir.box>>}>>) -> !fir.ref>>> +! CHECK: cuf.set_allocator_idx %[[COORD2]] : !fir.ref>>> {data_attr = #cuf.cuda} + + subroutine sub4() + type(ty_device), allocatable :: d1(:,:) + allocate(d1(10, 10)) + end subroutine + +! CHECK-LABEL: func.func @_QMm1Psub4() +! CHECK: cuf.allocate +! CHECK-COUNT-2: fir.do_loop +! CHECK-COUNT-2: cuf.set_allocator_idx end module