From 2a8d78189a6c20a482dd44dfe43f8c919d0e53d6 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Fri, 16 May 2025 01:44:31 -0700 Subject: [PATCH 1/4] [flang] use DataLayout instead of GEP to compute element size --- .../flang/Optimizer/CodeGen/FIROpPatterns.h | 4 ++ flang/lib/Optimizer/CodeGen/CodeGen.cpp | 50 ++++++++--------- flang/test/Fir/convert-to-llvm.fir | 54 +++++-------------- flang/test/Fir/copy-codegen.fir | 12 ++--- flang/test/Fir/embox-char.fir | 8 +-- flang/test/Fir/embox-substring.fir | 7 ++- 6 files changed, 48 insertions(+), 87 deletions(-) diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h index 53d16323beddf..7b1c14e4dfdc9 100644 --- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h +++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h @@ -173,6 +173,10 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern { this->getTypeConverter()); } + const mlir::DataLayout &getDataLayout() const { + return lowerTy().getDataLayout(); + } + void attachTBAATag(mlir::LLVM::AliasAnalysisOpInterface op, mlir::Type baseFIRType, mlir::Type accessFIRType, mlir::LLVM::GEPOp gep) const { diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index e534cfa5591c6..ad9119ba4a031 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -1043,22 +1043,12 @@ static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op, static mlir::Value computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType, mlir::Type idxTy, - mlir::ConversionPatternRewriter &rewriter) { - // Note that we cannot use something like - // mlir::LLVM::getPrimitiveTypeSizeInBits() for the element type here. For - // example, it returns 10 bytes for mlir::Float80Type for targets where it - // occupies 16 bytes. Proper solution is probably to use - // mlir::DataLayout::getTypeABIAlignment(), but DataLayout is not being set - // yet (see llvm-project#57230). For the time being use the '(intptr_t)((type - // *)0 + 1)' trick for all types. The generated instructions are optimized - // into constant by the first pass of InstCombine, so it should not be a - // performance issue. - auto llvmPtrTy = ::getLlvmPtrType(llvmObjectType.getContext()); - auto nullPtr = rewriter.create(loc, llvmPtrTy); - auto gep = rewriter.create( - loc, llvmPtrTy, llvmObjectType, nullPtr, - llvm::ArrayRef{1}); - return rewriter.create(loc, idxTy, gep); + mlir::ConversionPatternRewriter &rewriter, + const mlir::DataLayout &dataLayout) { + llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType); + unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType); + std::int64_t distance = llvm::alignTo(size, alignment); + return genConstantIndex(loc, idxTy, rewriter, distance); } /// Return value of the stride in bytes between adjacent elements @@ -1066,10 +1056,10 @@ computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType, /// \p idxTy integer type. static mlir::Value genTypeStrideInBytes(mlir::Location loc, mlir::Type idxTy, - mlir::ConversionPatternRewriter &rewriter, - mlir::Type llTy) { + mlir::ConversionPatternRewriter &rewriter, mlir::Type llTy, + const mlir::DataLayout &dataLayout) { // Create a pointer type and use computeElementDistance(). - return computeElementDistance(loc, llTy, idxTy, rewriter); + return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout); } namespace { @@ -1111,7 +1101,7 @@ struct AllocMemOpConversion : public fir::FIROpConversion { mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy, mlir::ConversionPatternRewriter &rewriter, mlir::Type llTy) const { - return computeElementDistance(loc, llTy, idxTy, rewriter); + return computeElementDistance(loc, llTy, idxTy, rewriter, getDataLayout()); } }; } // namespace @@ -1323,8 +1313,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion { fir::CharacterType charTy, mlir::ValueRange lenParams) const { auto i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64); - mlir::Value size = - genTypeStrideInBytes(loc, i64Ty, rewriter, this->convertType(charTy)); + mlir::Value size = genTypeStrideInBytes( + loc, i64Ty, rewriter, this->convertType(charTy), this->getDataLayout()); if (charTy.hasConstantLen()) return size; // Length accounted for in the genTypeStrideInBytes GEP. // Otherwise, multiply the single character size by the length. @@ -1338,6 +1328,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion { std::tuple getSizeAndTypeCode( mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, mlir::Type boxEleTy, mlir::ValueRange lenParams = {}) const { + const mlir::DataLayout &dataLayout = this->getDataLayout(); auto i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64); if (auto eleTy = fir::dyn_cast_ptrEleTy(boxEleTy)) boxEleTy = eleTy; @@ -1354,18 +1345,19 @@ struct EmboxCommonConversion : public fir::FIROpConversion { mlir::dyn_cast(boxEleTy) || fir::isa_real(boxEleTy) || fir::isa_complex(boxEleTy)) return {genTypeStrideInBytes(loc, i64Ty, rewriter, - this->convertType(boxEleTy)), + this->convertType(boxEleTy), dataLayout), typeCodeVal}; if (auto charTy = mlir::dyn_cast(boxEleTy)) return {getCharacterByteSize(loc, rewriter, charTy, lenParams), typeCodeVal}; if (fir::isa_ref_type(boxEleTy)) { auto ptrTy = ::getLlvmPtrType(rewriter.getContext()); - return {genTypeStrideInBytes(loc, i64Ty, rewriter, ptrTy), typeCodeVal}; + return {genTypeStrideInBytes(loc, i64Ty, rewriter, ptrTy, dataLayout), + typeCodeVal}; } if (mlir::isa(boxEleTy)) return {genTypeStrideInBytes(loc, i64Ty, rewriter, - this->convertType(boxEleTy)), + this->convertType(boxEleTy), dataLayout), typeCodeVal}; fir::emitFatalError(loc, "unhandled type in fir.box code generation"); } @@ -1909,8 +1901,8 @@ struct XEmboxOpConversion : public EmboxCommonConversion { if (hasSubcomp) { // We have a subcomponent. The step value needs to be the number of // bytes per element (which is a derived type). - prevDimByteStride = - genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(seqEleTy)); + prevDimByteStride = genTypeStrideInBytes( + loc, i64Ty, rewriter, convertType(seqEleTy), getDataLayout()); } else if (hasSubstr) { // We have a substring. The step value needs to be the number of bytes // per CHARACTER element. @@ -3604,8 +3596,8 @@ struct CopyOpConversion : public fir::FIROpConversion { mlir::Value llvmDestination = adaptor.getDestination(); mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64); mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType()); - mlir::Value copySize = - genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy)); + mlir::Value copySize = genTypeStrideInBytes( + loc, i64Ty, rewriter, convertType(copyTy), getDataLayout()); mlir::LLVM::AliasAnalysisOpInterface newOp; if (copy.getNoOverlap()) diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index 2960528fb6c24..6d8a8bb606b90 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -216,9 +216,7 @@ func.func @test_alloc_and_freemem_one() { } // CHECK-LABEL: llvm.func @test_alloc_and_freemem_one() { -// CHECK-NEXT: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK-NEXT: %[[N:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[N:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK-NEXT: llvm.call @malloc(%[[N]]) // CHECK: llvm.call @free(%{{.*}}) // CHECK-NEXT: llvm.return @@ -235,10 +233,8 @@ func.func @test_alloc_and_freemem_several() { } // CHECK-LABEL: llvm.func @test_alloc_and_freemem_several() { -// CHECK: [[NULL:%.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: [[PTR:%.*]] = llvm.getelementptr [[NULL]][{{.*}}] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<100 x f32> -// CHECK: [[N:%.*]] = llvm.ptrtoint [[PTR]] : !llvm.ptr to i64 -// CHECK: [[MALLOC:%.*]] = llvm.call @malloc([[N]]) +// CHECK: %[[N:.*]] = llvm.mlir.constant(400 : i64) : i64 +// CHECK: [[MALLOC:%.*]] = llvm.call @malloc(%[[N]]) // CHECK: llvm.call @free([[MALLOC]]) // CHECK: llvm.return @@ -251,9 +247,7 @@ func.func @test_with_shape(%ncols: index, %nrows: index) { // CHECK-LABEL: llvm.func @test_with_shape // CHECK-SAME: %[[NCOLS:.*]]: i64, %[[NROWS:.*]]: i64 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[FOUR:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[FOUR:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK: %[[DIM1_SIZE:.*]] = llvm.mul %[[FOUR]], %[[NCOLS]] : i64 // CHECK: %[[TOTAL_SIZE:.*]] = llvm.mul %[[DIM1_SIZE]], %[[NROWS]] : i64 // CHECK: %[[MEM:.*]] = llvm.call @malloc(%[[TOTAL_SIZE]]) @@ -269,9 +263,7 @@ func.func @test_string_with_shape(%len: index, %nelems: index) { // CHECK-LABEL: llvm.func @test_string_with_shape // CHECK-SAME: %[[LEN:.*]]: i64, %[[NELEMS:.*]]: i64) -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[ONE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[LEN_SIZE:.*]] = llvm.mul %[[ONE]], %[[LEN]] : i64 // CHECK: %[[TOTAL_SIZE:.*]] = llvm.mul %[[LEN_SIZE]], %[[NELEMS]] : i64 // CHECK: %[[MEM:.*]] = llvm.call @malloc(%[[TOTAL_SIZE]]) @@ -1654,9 +1646,7 @@ func.func @embox0(%arg0: !fir.ref>) { // AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> // AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[I64_ELEM_SIZE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[I64_ELEM_SIZE:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> // CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[I64_ELEM_SIZE]], %[[DESC]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> // CHECK: %[[CFI_VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32 @@ -1879,9 +1869,7 @@ func.func @xembox0(%arg0: !fir.ref>) { // AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[TYPE:.*]] = llvm.mlir.constant(9 : i32) : i32 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32 @@ -1933,9 +1921,7 @@ func.func @xembox0_i32(%arg0: !fir.ref>) { // CHECK: %[[C0_I32:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[TYPE:.*]] = llvm.mlir.constant(9 : i32) : i32 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32 @@ -1988,9 +1974,7 @@ func.func @xembox1(%arg0: !fir.ref>>) { // CHECK-LABEL: llvm.func @xembox1(%{{.*}}: !llvm.ptr) { // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(10 : i64) : i64 // CHECK: %{{.*}} = llvm.insertvalue %[[ELEM_LEN_I64]], %{{.*}}[1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[PREV_PTROFF:.*]] = llvm.mul %[[ELEM_LEN_I64]], %[[C0]] : i64 @@ -2042,9 +2026,7 @@ func.func private @_QPxb(!fir.box>) // AMDGPU: %[[AR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr<5> // AMDGPU: %[[ARR:.*]] = llvm.addrspacecast %[[AR]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(28 : i32) : i32 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(8 : i64) : i64 // CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> // CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)> // CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32 @@ -2126,9 +2108,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32 -// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32 @@ -2146,9 +2126,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // CHECK: %[[BOX6:.*]] = llvm.insertvalue %[[F18ADDENDUM_I8]], %[[BOX5]][6] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> // CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i64) : i64 // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[ELE_TYPE:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[GEP_DTYPE_SIZE:.*]] = llvm.getelementptr %[[ELE_TYPE]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_dt_sliceTt", (i32, i32)> -// CHECK: %[[PTRTOINT_DTYPE_SIZE:.*]] = llvm.ptrtoint %[[GEP_DTYPE_SIZE]] : !llvm.ptr to i64 +// CHECK: %[[PTRTOINT_DTYPE_SIZE:.*]] = llvm.mlir.constant(8 : i64) : i64 // CHECK: %[[ADJUSTED_OFFSET:.*]] = llvm.sub %[[C1]], %[[ONE]] : i64 // CHECK: %[[EXT_SUB:.*]] = llvm.sub %[[C10]], %[[C1]] : i64 // CHECK: %[[EXT_ADD:.*]] = llvm.add %[[EXT_SUB]], %[[C2]] : i64 @@ -2429,9 +2407,7 @@ func.func @test_rebox_1(%arg0: !fir.box>) { //CHECK: %[[SIX:.*]] = llvm.mlir.constant(6 : index) : i64 //CHECK: %[[EIGHTY:.*]] = llvm.mlir.constant(80 : index) : i64 //CHECK: %[[FLOAT_TYPE:.*]] = llvm.mlir.constant(27 : i32) : i32 -//CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -//CHECK: %[[ELEM_SIZE_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +//CHECK: %[[ELEM_SIZE_I64:.*]] = llvm.mlir.constant(4 : i64) : i64 //CHECK: %[[EXTRA_GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> //CHECK: %[[EXTRA:.*]] = llvm.load %[[EXTRA_GEP]] : !llvm.ptr -> i8 //CHECK: %[[RBOX:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> @@ -2504,9 +2480,7 @@ func.func @foo(%arg0: !fir.box} //CHECK: %[[COMPONENT_OFFSET_1:.*]] = llvm.mlir.constant(1 : i64) : i64 //CHECK: %[[ELEM_COUNT:.*]] = llvm.mlir.constant(7 : i64) : i64 //CHECK: %[[TYPE_CHAR:.*]] = llvm.mlir.constant(40 : i32) : i32 -//CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr -//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] -//CHECK: %[[CHAR_SIZE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64 +//CHECK: %[[CHAR_SIZE:.*]] = llvm.mlir.constant(1 : i64) : i64 //CHECK: %[[ELEM_SIZE:.*]] = llvm.mul %[[CHAR_SIZE]], %[[ELEM_COUNT]] //CHECK: %[[EXTRA_GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)> //CHECK: %[[EXTRA:.*]] = llvm.load %[[EXTRA_GEP]] : !llvm.ptr -> i8 diff --git a/flang/test/Fir/copy-codegen.fir b/flang/test/Fir/copy-codegen.fir index eef1885c6a49c..7b0620ca2d312 100644 --- a/flang/test/Fir/copy-codegen.fir +++ b/flang/test/Fir/copy-codegen.fir @@ -12,10 +12,8 @@ func.func @test_copy_1(%arg0: !fir.ref, %arg1: !fir.ref) { // CHECK-LABEL: llvm.func @test_copy_1( // CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr, // CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) { -// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)> -// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64 -// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(36 : i64) : i64 +// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_2]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () // CHECK: llvm.return // CHECK: } @@ -26,10 +24,8 @@ func.func @test_copy_2(%arg0: !fir.ref, %arg1: !fir.ref) { // CHECK-LABEL: llvm.func @test_copy_2( // CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr, // CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) { -// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)> -// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64 -// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () +// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(36 : i64) : i64 +// CHECK: "llvm.intr.memmove"(%[[VAL_1]], %[[VAL_0]], %[[VAL_2]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> () // CHECK: llvm.return // CHECK: } } diff --git a/flang/test/Fir/embox-char.fir b/flang/test/Fir/embox-char.fir index efb069f96520d..8e40acfdf289f 100644 --- a/flang/test/Fir/embox-char.fir +++ b/flang/test/Fir/embox-char.fir @@ -45,9 +45,7 @@ // CHECK: %[[VAL_30:.*]] = llvm.load %[[VAL_29]] : !llvm.ptr -> i64 // CHECK: %[[VAL_31:.*]] = llvm.sdiv %[[VAL_16]], %[[VAL_13]] : i64 // CHECK: %[[VAL_32:.*]] = llvm.mlir.constant(44 : i32) : i32 -// CHECK: %[[VAL_33:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_34:.*]] = llvm.getelementptr %[[VAL_33]][1] : (!llvm.ptr) -> !llvm.ptr, i32 -// CHECK: %[[VAL_35:.*]] = llvm.ptrtoint %[[VAL_34]] : !llvm.ptr to i64 +// CHECK: %[[VAL_35:.*]] = llvm.mlir.constant(4 : i64) : i64 // CHECK: %[[VAL_36:.*]] = llvm.mul %[[VAL_35]], %[[VAL_31]] : i64 // CHECK: %[[VAL_37:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_38:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_37]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> @@ -139,9 +137,7 @@ func.func @test_char4(%arg0: !fir.ref !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_29:.*]] = llvm.load %[[VAL_28]] : !llvm.ptr -> i64 // CHECK: %[[VAL_30:.*]] = llvm.mlir.constant(40 : i32) : i32 -// CHECK: %[[VAL_31:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_32:.*]] = llvm.getelementptr %[[VAL_31]][1] : (!llvm.ptr) -> !llvm.ptr, i8 -// CHECK: %[[VAL_33:.*]] = llvm.ptrtoint %[[VAL_32]] : !llvm.ptr to i64 +// CHECK: %[[VAL_33:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_34:.*]] = llvm.mul %[[VAL_33]], %[[VAL_15]] : i64 // CHECK: %[[VAL_35:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> // CHECK: %[[VAL_36:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_35]][1] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> diff --git a/flang/test/Fir/embox-substring.fir b/flang/test/Fir/embox-substring.fir index f2042f9bda7fc..6ce6346f89b1d 100644 --- a/flang/test/Fir/embox-substring.fir +++ b/flang/test/Fir/embox-substring.fir @@ -29,10 +29,9 @@ func.func private @dump(!fir.box>>) // CHECK-SAME: %[[VAL_0:.*]]: !llvm.ptr, // CHECK-SAME: %[[VAL_1:.*]]: i64) { // CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(1 : index) : i64 -// CHECK: llvm.getelementptr -// CHECK: %[[VAL_28:.*]] = llvm.mlir.zero : !llvm.ptr -// CHECK: %[[VAL_29:.*]] = llvm.getelementptr %[[VAL_28]][1] : (!llvm.ptr) -> !llvm.ptr, i8 -// CHECK: %[[VAL_30:.*]] = llvm.ptrtoint %[[VAL_29]] : !llvm.ptr to i64 +// CHECK: llvm.mlir.constant(1 : i64) : i64 +// CHECK: llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_30:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_31:.*]] = llvm.mul %[[VAL_30]], %[[VAL_1]] : i64 // CHECK: %[[VAL_42:.*]] = llvm.mul %[[VAL_31]], %[[VAL_5]] : i64 // CHECK: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_42]], %{{.*}}[7, 0, 2] : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> From d71c0b7f45582ece43016eb98367251e54e75280 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Fri, 16 May 2025 08:09:37 -0700 Subject: [PATCH 2/4] [flang] translate derived type array init to attribute if possible --- .../Optimizer/CodeGen/LLVMInsertChainFolder.h | 31 +++ .../include/flang/Optimizer/Dialect/FIROps.td | 5 + flang/lib/Optimizer/CodeGen/CMakeLists.txt | 1 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 51 +++-- .../CodeGen/LLVMInsertChainFolder.cpp | 204 ++++++++++++++++++ flang/lib/Optimizer/Dialect/FIROps.cpp | 15 ++ .../Fir/convert-and-fold-insert-on-range.fir | 33 +++ 7 files changed, 319 insertions(+), 21 deletions(-) create mode 100644 flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h create mode 100644 flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp create mode 100644 flang/test/Fir/convert-and-fold-insert-on-range.fir diff --git a/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h new file mode 100644 index 0000000000000..d577c4c0fa70b --- /dev/null +++ b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h @@ -0,0 +1,31 @@ +//===-- LLVMInsertChainFolder.h -- insertvalue chain folder ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Helper to fold LLVM dialect llvm.insertvalue chain representing constants +// into an Attribute representation. +// This sits in Flang because it is incomplete and tailored for flang needs. +// +//===----------------------------------------------------------------------===// + +namespace mlir { +class Attribute; +class OpBuilder; +class Value; +} // namespace mlir + +namespace fir { + +/// Attempt to fold an llvm.insertvalue chain into an attribute representation +/// suitable as llvm.constant operand. The returned value will be a null pointer +/// if this is not an llvm.insertvalue result pr if the chain is not a constant, +/// or cannot be represented as an Attribute. The operations are not deleted, +/// but some llvm.insertvalue value operands may be folded with the builder on +/// the way. +mlir::Attribute tryFoldingLLVMInsertChain(mlir::Value insertChainResult, + mlir::OpBuilder &builder); +} // namespace fir diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 458b780806144..dc66885f776f0 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2129,6 +2129,11 @@ def fir_InsertOnRangeOp : fir_OneResultOp<"insert_on_range", [NoMemoryEffect]> { $seq `,` $val custom($coor) attr-dict `:` functional-type(operands, results) }]; + let extraClassDeclaration = [{ + /// Is this insert_on_range inserting on all the values of the result type? + bool isFullRange(); + }]; + let hasVerifier = 1; } diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt index 04480bac552b7..980307db315d9 100644 --- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -3,6 +3,7 @@ add_flang_library(FIRCodeGen CodeGen.cpp CodeGenOpenMP.cpp FIROpPatterns.cpp + LLVMInsertChainFolder.cpp LowerRepackArrays.cpp PreCGRewrite.cpp TBAABuilder.cpp diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ad9119ba4a031..ed76a77ced047 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -14,6 +14,7 @@ #include "flang/Optimizer/CodeGen/CodeGenOpenMP.h" #include "flang/Optimizer/CodeGen/FIROpPatterns.h" +#include "flang/Optimizer/CodeGen/LLVMInsertChainFolder.h" #include "flang/Optimizer/CodeGen/TypeConverter.h" #include "flang/Optimizer/Dialect/FIRAttr.h" #include "flang/Optimizer/Dialect/FIRCG/CGOps.h" @@ -2412,15 +2413,38 @@ struct InsertOnRangeOpConversion doRewrite(fir::InsertOnRangeOp range, mlir::Type ty, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { - llvm::SmallVector dims; - auto type = adaptor.getOperands()[0].getType(); + auto arrayType = adaptor.getSeq().getType(); // Iteratively extract the array dimensions from the type. + llvm::SmallVector dims; + mlir::Type type = arrayType; while (auto t = mlir::dyn_cast(type)) { dims.push_back(t.getNumElements()); type = t.getElementType(); } + // Avoid generating long insert chain that are very slow to fold back + // (which is required in globals when later generating LLVM IR). Attempt to + // fold the inserted element value to an attribute and build an ArrayAttr + // for the resulting array. + if (range.isFullRange()) { + if (mlir::Attribute cst = + fir::tryFoldingLLVMInsertChain(adaptor.getVal(), rewriter)) { + mlir::Attribute dimVal = cst; + for (auto dim : llvm::reverse(dims)) { + // Use std::vector in case the number of elements is big. + std::vector elements(dim, dimVal); + dimVal = mlir::ArrayAttr::get(range.getContext(), elements); + } + // Replace insert chain with constant. + rewriter.replaceOpWithNewOp(range, arrayType, + dimVal); + return mlir::success(); + } + } + + // The inserted value cannot be folded to an attribute, turn the + // insert_range into an llvm.insertvalue chain. llvm::SmallVector lBounds; llvm::SmallVector uBounds; @@ -2434,8 +2458,8 @@ struct InsertOnRangeOpConversion auto &subscripts = lBounds; auto loc = range.getLoc(); - mlir::Value lastOp = adaptor.getOperands()[0]; - mlir::Value insertVal = adaptor.getOperands()[1]; + mlir::Value lastOp = adaptor.getSeq(); + mlir::Value insertVal = adaptor.getVal(); while (subscripts != uBounds) { lastOp = rewriter.create( @@ -3131,7 +3155,7 @@ struct GlobalOpConversion : public fir::FIROpConversion { // initialization is on the full range. auto insertOnRangeOps = gr.front().getOps(); for (auto insertOp : insertOnRangeOps) { - if (isFullRange(insertOp.getCoor(), insertOp.getType())) { + if (insertOp.isFullRange()) { auto seqTyAttr = convertType(insertOp.getType()); auto *op = insertOp.getVal().getDefiningOp(); auto constant = mlir::dyn_cast(op); @@ -3161,22 +3185,7 @@ struct GlobalOpConversion : public fir::FIROpConversion { return mlir::success(); } - bool isFullRange(mlir::DenseIntElementsAttr indexes, - fir::SequenceType seqTy) const { - auto extents = seqTy.getShape(); - if (indexes.size() / 2 != static_cast(extents.size())) - return false; - auto cur_index = indexes.value_begin(); - for (unsigned i = 0; i < indexes.size(); i += 2) { - if (*(cur_index++) != 0) - return false; - if (*(cur_index++) != extents[i / 2] - 1) - return false; - } - return true; - } - - // TODO: String comparaison should be avoided. Replace linkName with an + // TODO: String comparisons should be avoided. Replace linkName with an // enumeration. mlir::LLVM::Linkage convertLinkage(std::optional optLinkage) const { diff --git a/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp b/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp new file mode 100644 index 0000000000000..0fc8697b735cf --- /dev/null +++ b/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp @@ -0,0 +1,204 @@ +//===-- LLVMInsertChainFolder.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/CodeGen/LLVMInsertChainFolder.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Builders.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "flang-insert-folder" + +#include + +namespace { +// Helper class to construct the attribute elements of an aggregate value being +// folded without creating a full mlir::Attribute representation for each step +// of the insert value chain, which would both be expensive in terms of +// compilation time and memory (since the intermediate Attribute would survive, +// unused, inside the mlir context). +class InsertChainBackwardFolder { + // Type for the current value of an element of the aggregate value being + // constructed by the insert chain. + // At any point of the insert chain, the value of an element is either: + // - nullptr: not yet known, the insert has not yet been seen. + // - an mlir::Attribute: the element is fully defined. + // - a nested InsertChainBackwardFolder: the element is itself an aggregate + // and its sub-elements have been partially defined (insert with mutliple + // indices have been seen). + + // The insertion folder assumes backward walk of the insert chain. Once an + // element or sub-element has been defined, it is not overriden by new + // insertions (last insert wins). + using InFlightValue = + llvm::PointerUnion; + +public: + InsertChainBackwardFolder( + mlir::Type type, std::deque *folderStorage) + : values(getNumElements(type), mlir::Attribute{}), + folderStorage{folderStorage}, type{type} {} + + /// Push + bool pushValue(mlir::Attribute val, llvm::ArrayRef at); + + mlir::Attribute finalize(mlir::Attribute defaultFieldValue); + +private: + static int64_t getNumElements(mlir::Type type) { + if (auto structTy = + llvm::dyn_cast_if_present(type)) + return structTy.getBody().size(); + if (auto arrayTy = + llvm::dyn_cast_if_present(type)) + return arrayTy.getNumElements(); + return 0; + } + + static mlir::Type getSubElementType(mlir::Type type, int64_t field) { + if (auto arrayTy = + llvm::dyn_cast_if_present(type)) + return arrayTy.getElementType(); + if (auto structTy = + llvm::dyn_cast_if_present(type)) + return structTy.getBody()[field]; + return {}; + } + + // Current element value of the aggregate value being built. + llvm::SmallVector values; + // std::deque is used to allocate storage for nested list and guarantee the + // stability of the InsertChainBackwardFolder* used as element value. + std::deque *folderStorage; + // Type of the aggregate value being built. + mlir::Type type; +}; +} // namespace + +// Helper to fold the value being inserted by an llvm.insert_value. +// This may call tryFoldingLLVMInsertChain if the value is an aggregate and +// was itself constructed by a different insert chain. +static mlir::Attribute getAttrIfConstant(mlir::Value val, + mlir::OpBuilder &rewriter) { + if (auto cst = val.getDefiningOp()) + return cst.getValue(); + if (auto insert = val.getDefiningOp()) + return fir::tryFoldingLLVMInsertChain(val, rewriter); + if (val.getDefiningOp()) + return mlir::LLVM::ZeroAttr::get(val.getContext()); + if (val.getDefiningOp()) + return mlir::LLVM::UndefAttr::get(val.getContext()); + if (mlir::Operation *op = val.getDefiningOp()) { + unsigned resNum = llvm::cast(val).getResultNumber(); + llvm::SmallVector results; + if (mlir::succeeded(rewriter.tryFold(op, results)) && + results.size() > resNum) { + if (auto cst = results[resNum].getDefiningOp()) + return cst.getValue(); + } + } + if (auto trunc = val.getDefiningOp()) + if (auto attr = getAttrIfConstant(trunc.getArg(), rewriter)) + if (auto intAttr = llvm::dyn_cast(attr)) + return mlir::IntegerAttr::get(trunc.getType(), intAttr.getInt()); + LLVM_DEBUG(llvm::dbgs() << "cannot fold insert value operand: " << val + << "\n"); + return {}; +} + +mlir::Attribute +InsertChainBackwardFolder::finalize(mlir::Attribute defaultFieldValue) { + std::vector attrs; + attrs.reserve(values.size()); + for (InFlightValue &inFlight : values) { + if (!inFlight) { + attrs.push_back(defaultFieldValue); + } else if (auto attr = llvm::dyn_cast(inFlight)) { + attrs.push_back(attr); + } else { + auto *inFlightList = llvm::cast(inFlight); + attrs.push_back(inFlightList->finalize(defaultFieldValue)); + } + } + return mlir::ArrayAttr::get(type.getContext(), attrs); +} + +bool InsertChainBackwardFolder::pushValue(mlir::Attribute val, + llvm::ArrayRef at) { + if (at.size() == 0 || at[0] >= static_cast(values.size())) + return false; + InFlightValue &inFlight = values[at[0]]; + if (!inFlight) { + if (at.size() == 1) { + inFlight = val; + return true; + } + // This is the first insert to a nested field. Create a + // InsertChainBackwardFolder for the current element value. + InsertChainBackwardFolder &inFlightList = folderStorage->emplace_back( + getSubElementType(type, at[0]), folderStorage); + inFlight = &inFlightList; + return inFlightList.pushValue(val, at.drop_front()); + } + // Keep last inserted value if already set. + if (llvm::isa(inFlight)) + return true; + auto *inFlightList = llvm::cast(inFlight); + if (at.size() == 1) { + if (!llvm::isa(val)) { + LLVM_DEBUG(llvm::dbgs() + << "insert chain sub-element partially overwritten initial " + "value is not zero or undef\n"); + return false; + } + inFlight = inFlightList->finalize(val); + return true; + } + return inFlightList->pushValue(val, at.drop_front()); +} + +mlir::Attribute fir::tryFoldingLLVMInsertChain(mlir::Value val, + mlir::OpBuilder &rewriter) { + if (auto cst = val.getDefiningOp()) + return cst.getValue(); + if (auto insert = val.getDefiningOp()) { + LLVM_DEBUG(llvm::dbgs() << "trying to fold insert chain:" << val << "\n"); + if (auto structTy = + llvm::dyn_cast(insert.getType())) { + mlir::LLVM::InsertValueOp currentInsert = insert; + mlir::LLVM::InsertValueOp lastInsert; + std::deque folderStorage; + InsertChainBackwardFolder inFlightList(structTy, &folderStorage); + while (currentInsert) { + mlir::Attribute attr = + getAttrIfConstant(currentInsert.getValue(), rewriter); + if (!attr) + return {}; + if (!inFlightList.pushValue(attr, currentInsert.getPosition())) + return {}; + lastInsert = currentInsert; + currentInsert = currentInsert.getContainer() + .getDefiningOp(); + } + mlir::Attribute defaultVal; + if (lastInsert) { + if (lastInsert.getContainer().getDefiningOp()) + defaultVal = mlir::LLVM::ZeroAttr::get(val.getContext()); + else if (lastInsert.getContainer().getDefiningOp()) + defaultVal = mlir::LLVM::UndefAttr::get(val.getContext()); + } + if (!defaultVal) { + LLVM_DEBUG(llvm::dbgs() + << "insert chain initial value is not Zero or Undef\n"); + return {}; + } + return inFlightList.finalize(defaultVal); + } + } + return {}; +} diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index d85b38c467857..e12af7782a578 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -2365,6 +2365,21 @@ llvm::LogicalResult fir::InsertOnRangeOp::verify() { return mlir::success(); } +bool fir::InsertOnRangeOp::isFullRange() { + auto extents = getType().getShape(); + mlir::DenseIntElementsAttr indexes = getCoor(); + if (indexes.size() / 2 != static_cast(extents.size())) + return false; + auto cur_index = indexes.value_begin(); + for (unsigned i = 0; i < indexes.size(); i += 2) { + if (*(cur_index++) != 0) + return false; + if (*(cur_index++) != extents[i / 2] - 1) + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // InsertValueOp //===----------------------------------------------------------------------===// diff --git a/flang/test/Fir/convert-and-fold-insert-on-range.fir b/flang/test/Fir/convert-and-fold-insert-on-range.fir new file mode 100644 index 0000000000000..df18614d80b63 --- /dev/null +++ b/flang/test/Fir/convert-and-fold-insert-on-range.fir @@ -0,0 +1,33 @@ +// Test codegen of constant insert_on_range without symbol reference into mlir.constant. +// RUN: fir-opt --cg-rewrite --split-input-file --fir-to-llvm-ir %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + fir.global @derived_array : !fir.array<2x!fir.type>>}>> { + %c0 = arith.constant 0 : index + %0 = fir.undefined !fir.type>>}> + %1 = fir.zero_bits !fir.heap> + %2 = fir.shape %c0 : (index) -> !fir.shape<1> + %3 = fir.embox %1(%2) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> + %4 = fir.insert_value %0, %3, ["comp", !fir.type>>}>] : (!fir.type>>}>, !fir.box>>) -> !fir.type>>}> + %5 = fir.undefined !fir.array<2x!fir.type>>}>> + %6 = fir.insert_on_range %5, %4 from (0) to (1) : (!fir.array<2x!fir.type>>}>>, !fir.type>>}>) -> !fir.array<2x!fir.type>>}>> + fir.has_value %6 : !fir.array<2x!fir.type>>}>> + } +} + +//CHECK-LABEL: llvm.mlir.global external @derived_array() +//CHECK: %[[CST:.*]] = llvm.mlir.constant([ +//CHECK-SAME: [ +//CHECK-SAME: [#llvm.zero, 8, 20240719 : i32, 1 : i8, 28 : i8, 2 : i8, 0 : i8, +//CHECK-SAME: [ +//CHECK-SAME: [1, 0 : index, 8] +//CHECK-SAME: ] +//CHECK-SAME: ], +//CHECK-SAME: [ +//CHECK-SAME: [#llvm.zero, 8, 20240719 : i32, 1 : i8, 28 : i8, 2 : i8, 0 : i8, +//CHECK-SAME: [ +//CHECK-SAME: [1, 0 : index, 8] +//CHECK-SAME: ] +//CHECK-SAME: ]) : +//CHECK-SAME: !llvm.array<2 x struct<"sometype", (struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>)>> +//CHECK: llvm.return %[[CST]] : !llvm.array<2 x struct<"sometype", (struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>)>> From 796a1e0269baf1c77ffabf47a8fa155356bc9096 Mon Sep 17 00:00:00 2001 From: Jean Perier Date: Mon, 19 May 2025 01:37:14 -0700 Subject: [PATCH 3/4] use map_to_vector and FailureOr --- .../Optimizer/CodeGen/LLVMInsertChainFolder.h | 7 ++- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 7 +-- .../CodeGen/LLVMInsertChainFolder.cpp | 54 ++++++++++--------- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h index d577c4c0fa70b..321bda91aa6fe 100644 --- a/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h +++ b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h @@ -12,6 +12,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/LogicalResult.h" + namespace mlir { class Attribute; class OpBuilder; @@ -26,6 +28,7 @@ namespace fir { /// or cannot be represented as an Attribute. The operations are not deleted, /// but some llvm.insertvalue value operands may be folded with the builder on /// the way. -mlir::Attribute tryFoldingLLVMInsertChain(mlir::Value insertChainResult, - mlir::OpBuilder &builder); +llvm::FailureOr +tryFoldingLLVMInsertChain(mlir::Value insertChainResult, + mlir::OpBuilder &builder); } // namespace fir diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index ed76a77ced047..70c90fae34086 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -2428,9 +2428,10 @@ struct InsertOnRangeOpConversion // fold the inserted element value to an attribute and build an ArrayAttr // for the resulting array. if (range.isFullRange()) { - if (mlir::Attribute cst = - fir::tryFoldingLLVMInsertChain(adaptor.getVal(), rewriter)) { - mlir::Attribute dimVal = cst; + llvm::FailureOr cst = + fir::tryFoldingLLVMInsertChain(adaptor.getVal(), rewriter); + if (llvm::succeeded(cst)) { + mlir::Attribute dimVal = *cst; for (auto dim : llvm::reverse(dims)) { // Use std::vector in case the number of elements is big. std::vector elements(dim, dimVal); diff --git a/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp b/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp index 0fc8697b735cf..5b522f2647916 100644 --- a/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp +++ b/flang/lib/Optimizer/CodeGen/LLVMInsertChainFolder.cpp @@ -67,7 +67,7 @@ class InsertChainBackwardFolder { if (auto structTy = llvm::dyn_cast_if_present(type)) return structTy.getBody()[field]; - return {}; + return nullptr; } // Current element value of the aggregate value being built. @@ -83,12 +83,18 @@ class InsertChainBackwardFolder { // Helper to fold the value being inserted by an llvm.insert_value. // This may call tryFoldingLLVMInsertChain if the value is an aggregate and // was itself constructed by a different insert chain. +// Returns a nullptr Attribute if the value could not be folded. static mlir::Attribute getAttrIfConstant(mlir::Value val, mlir::OpBuilder &rewriter) { if (auto cst = val.getDefiningOp()) return cst.getValue(); - if (auto insert = val.getDefiningOp()) - return fir::tryFoldingLLVMInsertChain(val, rewriter); + if (auto insert = val.getDefiningOp()) { + llvm::FailureOr attr = + fir::tryFoldingLLVMInsertChain(val, rewriter); + if (succeeded(attr)) + return *attr; + return nullptr; + } if (val.getDefiningOp()) return mlir::LLVM::ZeroAttr::get(val.getContext()); if (val.getDefiningOp()) @@ -108,23 +114,20 @@ static mlir::Attribute getAttrIfConstant(mlir::Value val, return mlir::IntegerAttr::get(trunc.getType(), intAttr.getInt()); LLVM_DEBUG(llvm::dbgs() << "cannot fold insert value operand: " << val << "\n"); - return {}; + return nullptr; } mlir::Attribute InsertChainBackwardFolder::finalize(mlir::Attribute defaultFieldValue) { - std::vector attrs; - attrs.reserve(values.size()); - for (InFlightValue &inFlight : values) { - if (!inFlight) { - attrs.push_back(defaultFieldValue); - } else if (auto attr = llvm::dyn_cast(inFlight)) { - attrs.push_back(attr); - } else { - auto *inFlightList = llvm::cast(inFlight); - attrs.push_back(inFlightList->finalize(defaultFieldValue)); - } - } + llvm::SmallVector attrs = llvm::map_to_vector( + values, [&](InFlightValue inFlight) -> mlir::Attribute { + if (!inFlight) + return defaultFieldValue; + if (auto attr = llvm::dyn_cast(inFlight)) + return attr; + return llvm::cast(inFlight)->finalize( + defaultFieldValue); + }); return mlir::ArrayAttr::get(type.getContext(), attrs); } @@ -140,8 +143,11 @@ bool InsertChainBackwardFolder::pushValue(mlir::Attribute val, } // This is the first insert to a nested field. Create a // InsertChainBackwardFolder for the current element value. - InsertChainBackwardFolder &inFlightList = folderStorage->emplace_back( - getSubElementType(type, at[0]), folderStorage); + mlir::Type subType = getSubElementType(type, at[0]); + if (!subType) + return false; + InsertChainBackwardFolder &inFlightList = + folderStorage->emplace_back(subType, folderStorage); inFlight = &inFlightList; return inFlightList.pushValue(val, at.drop_front()); } @@ -162,8 +168,8 @@ bool InsertChainBackwardFolder::pushValue(mlir::Attribute val, return inFlightList->pushValue(val, at.drop_front()); } -mlir::Attribute fir::tryFoldingLLVMInsertChain(mlir::Value val, - mlir::OpBuilder &rewriter) { +llvm::FailureOr +fir::tryFoldingLLVMInsertChain(mlir::Value val, mlir::OpBuilder &rewriter) { if (auto cst = val.getDefiningOp()) return cst.getValue(); if (auto insert = val.getDefiningOp()) { @@ -178,9 +184,9 @@ mlir::Attribute fir::tryFoldingLLVMInsertChain(mlir::Value val, mlir::Attribute attr = getAttrIfConstant(currentInsert.getValue(), rewriter); if (!attr) - return {}; + return llvm::failure(); if (!inFlightList.pushValue(attr, currentInsert.getPosition())) - return {}; + return llvm::failure(); lastInsert = currentInsert; currentInsert = currentInsert.getContainer() .getDefiningOp(); @@ -195,10 +201,10 @@ mlir::Attribute fir::tryFoldingLLVMInsertChain(mlir::Value val, if (!defaultVal) { LLVM_DEBUG(llvm::dbgs() << "insert chain initial value is not Zero or Undef\n"); - return {}; + return llvm::failure(); } return inFlightList.finalize(defaultVal); } } - return {}; + return llvm::failure(); } From 181985e213289b1f0caa4d81599e07fbec067fb5 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 20 May 2025 10:47:35 +0200 Subject: [PATCH 4/4] Apply suggestions from code review Thanks Slava! Co-authored-by: Slava Zakharin --- flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h index 321bda91aa6fe..36360dc77d588 100644 --- a/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h +++ b/flang/include/flang/Optimizer/CodeGen/LLVMInsertChainFolder.h @@ -23,8 +23,8 @@ class Value; namespace fir { /// Attempt to fold an llvm.insertvalue chain into an attribute representation -/// suitable as llvm.constant operand. The returned value will be a null pointer -/// if this is not an llvm.insertvalue result pr if the chain is not a constant, +/// suitable as llvm.constant operand. The returned value will be llvm::Failure +/// if this is not an llvm.insertvalue result or if the chain is not a constant, /// or cannot be represented as an Attribute. The operations are not deleted, /// but some llvm.insertvalue value operands may be folded with the builder on /// the way.