Skip to content

Commit 38926f5

Browse files
committed
Never load/store fir.record and character type objects
So far, there were places where lowering was loading/stroring fir.record type to copy them. This works, but hits LLVM limitations when the derived type storage size is big [1]. LLVM advise against loading/storing big aggregates types here: https://llvm.org/docs/Frontend/PerformanceTips.html#avoid-loads-and-stores-of-large-aggregate-type but falls short of defining what is big. Simply always copy derived types component by components when a load/store was previously used. Note that it was considered keeping load/stores in fir, and transforming this to element by element copies in codegen, but it is not possible to ensure the load/store will still be pairs after fir transformation passes. Hence, generalize the use of genRecordAssignment helper, and add a new fir::factory::genScalarAssignement to centralize scalar assignment as much as possible. Also prevents the load/store of constant size fir.characters in places where it was happening (array value copy) since these too are aggregate types that can grow big. [1] llvm mailing list question: https://lists.llvm.org/pipermail/llvm-dev/2021-November/153844.html
1 parent 483737c commit 38926f5

File tree

9 files changed

+275
-97
lines changed

9 files changed

+275
-97
lines changed

flang/include/flang/Optimizer/Builder/FIRBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,11 @@ fir::ExtendedValue arraySectionElementToExtendedValue(
466466
fir::FirOpBuilder &builder, mlir::Location loc,
467467
const fir::ExtendedValue &array, mlir::Value element, mlir::Value slice);
468468

469+
/// Assign \p rhs to \p lhs. Both \p rhs and \p lhs must be scalars. The
470+
/// assignment follows Fortran intrinsic assignment semantic (10.2.1.3).
471+
void genScalarAssignment(fir::FirOpBuilder &builder, mlir::Location loc,
472+
const fir::ExtendedValue &lhs,
473+
const fir::ExtendedValue &rhs);
469474
/// Assign \p rhs to \p lhs. Both \p rhs and \p lhs must be scalar derived
470475
/// types. The assignment follows Fortran intrinsic assignment semantic for
471476
/// derived types (10.2.1.3 point 13).

flang/lib/Lower/ConvertExpr.cpp

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,9 @@ class ScalarExprLowering {
493493
return addr.match(
494494
[](const fir::CharBoxValue &box) -> ExtValue { return box; },
495495
[&](const fir::UnboxedValue &v) -> ExtValue {
496+
if (fir::unwrapRefType(fir::getBase(v).getType())
497+
.isa<fir::RecordType>())
498+
return v;
496499
return builder.create<fir::LoadOp>(loc, fir::getBase(v));
497500
},
498501
[&](const auto &v) -> ExtValue {
@@ -683,18 +686,12 @@ class ScalarExprLowering {
683686
auto to = fir::factory::componentToExtendedValue(builder, loc, coor);
684687
to.match(
685688
[&](const fir::UnboxedValue &toPtr) {
686-
// FIXME: if toPtr is a derived type, it is incorrect after F95 to
687-
// simply load/store derived type since they may have allocatable
688-
// components that require deep-copy or may have defined assignment
689-
// procedures.
690-
auto val = fir::getBase(genval(expr.value()));
691-
auto cast = builder.createConvert(
692-
loc, fir::dyn_cast_ptrEleTy(toPtr.getType()), val);
693-
builder.create<fir::StoreOp>(loc, cast, toPtr);
689+
ExtValue value = genval(expr.value());
690+
fir::factory::genScalarAssignment(builder, loc, to, value);
694691
},
695692
[&](const fir::CharBoxValue &) {
696-
fir::factory::CharacterExprHelper{builder, loc}.createAssign(
697-
to, genval(expr.value()));
693+
ExtValue value = genval(expr.value());
694+
fir::factory::genScalarAssignment(builder, loc, to, value);
698695
},
699696
[&](const fir::ArrayBoxValue &) {
700697
Fortran::lower::createSomeArrayAssignment(
@@ -722,7 +719,7 @@ class ScalarExprLowering {
722719
TODO(loc, "procedure pointer component in derived type assignment");
723720
});
724721
}
725-
return builder.create<fir::LoadOp>(loc, res);
722+
return res;
726723
}
727724

728725
/// Lowering of an <i>ac-do-variable</i>, which is not a Symbol.
@@ -5260,8 +5257,7 @@ class ArrayExprLowering {
52605257
builder.createConvert(loc, fir::HeapType::get(resTy), mem);
52615258
auto buffi = builder.create<fir::CoordinateOp>(loc, eleRefTy, buff,
52625259
mlir::ValueRange{off});
5263-
auto val = builder.createConvert(loc, eleTy, v);
5264-
builder.create<fir::StoreOp>(loc, val, buffi);
5260+
fir::factory::genScalarAssignment(builder, loc, buffi, v);
52655261
builder.create<fir::StoreOp>(loc, plusOne, buffPos);
52665262
},
52675263
[&](const fir::CharBoxValue &v) {

flang/lib/Optimizer/Builder/FIRBuilder.cpp

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -823,8 +823,99 @@ fir::ExtendedValue fir::factory::arraySectionElementToExtendedValue(
823823
return fir::factory::componentToExtendedValue(builder, loc, element);
824824
}
825825

826+
void fir::factory::genScalarAssignment(fir::FirOpBuilder &builder,
827+
mlir::Location loc,
828+
const fir::ExtendedValue &lhs,
829+
const fir::ExtendedValue &rhs) {
830+
assert(lhs.rank() == 0 && rhs.rank() == 0 && "must be scalars");
831+
auto type = fir::unwrapSequenceType(
832+
fir::unwrapPassByRefType(fir::getBase(lhs).getType()));
833+
if (type.isa<fir::CharacterType>()) {
834+
const fir::CharBoxValue *toChar = lhs.getCharBox();
835+
const fir::CharBoxValue *fromChar = rhs.getCharBox();
836+
assert(toChar && fromChar);
837+
fir::factory::CharacterExprHelper helper{builder, loc};
838+
helper.createAssign(fir::ExtendedValue{*toChar},
839+
fir::ExtendedValue{*fromChar});
840+
} else if (type.isa<fir::RecordType>()) {
841+
fir::factory::genRecordAssignment(builder, loc, lhs, rhs);
842+
} else {
843+
assert(!fir::hasDynamicSize(type));
844+
auto rhsVal = fir::getBase(rhs);
845+
if (fir::isa_ref_type(rhsVal.getType()))
846+
rhsVal = builder.create<fir::LoadOp>(loc, rhsVal);
847+
mlir::Value lhsAddr = fir::getBase(lhs);
848+
rhsVal = builder.createConvert(loc, fir::unwrapRefType(lhsAddr.getType()),
849+
rhsVal);
850+
builder.create<fir::StoreOp>(loc, rhsVal, lhsAddr);
851+
}
852+
}
853+
854+
static void genComponentByComponentAssignment(fir::FirOpBuilder &builder,
855+
mlir::Location loc,
856+
const fir::ExtendedValue &lhs,
857+
const fir::ExtendedValue &rhs) {
858+
auto baseType = fir::unwrapPassByRefType(fir::getBase(lhs).getType());
859+
auto lhsType = baseType.dyn_cast<fir::RecordType>();
860+
assert(lhsType && "lhs must be a scalar record type");
861+
auto fieldIndexType = fir::FieldType::get(lhsType.getContext());
862+
for (auto [fieldName, fieldType] : lhsType.getTypeList()) {
863+
assert(!fir::hasDynamicSize(fieldType));
864+
mlir::Value field = builder.create<fir::FieldIndexOp>(
865+
loc, fieldIndexType, fieldName, lhsType, fir::getTypeParams(lhs));
866+
auto fieldRefType = builder.getRefType(fieldType);
867+
mlir::Value fromCoor = builder.create<fir::CoordinateOp>(
868+
loc, fieldRefType, fir::getBase(rhs), field);
869+
mlir::Value toCoor = builder.create<fir::CoordinateOp>(
870+
loc, fieldRefType, fir::getBase(lhs), field);
871+
llvm::Optional<fir::DoLoopOp> outerLoop;
872+
if (auto sequenceType = fieldType.dyn_cast<fir::SequenceType>()) {
873+
// Create loops to assign array components elements by elements.
874+
// Note that, since these are components, they either do not overlap,
875+
// or are the same and exactly overlap. They also have compile time
876+
// constant shapes.
877+
mlir::Type idxTy = builder.getIndexType();
878+
llvm::SmallVector<mlir::Value> indices;
879+
mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
880+
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
881+
for (auto extent : llvm::reverse(sequenceType.getShape())) {
882+
// TODO: add zero size test !
883+
mlir::Value ub = builder.createIntegerConstant(loc, idxTy, extent - 1);
884+
auto loop = builder.create<fir::DoLoopOp>(loc, zero, ub, one);
885+
if (!outerLoop)
886+
outerLoop = loop;
887+
indices.push_back(loop.getInductionVar());
888+
builder.setInsertionPointToStart(loop.getBody());
889+
}
890+
// Set indices in column-major order.
891+
std::reverse(indices.begin(), indices.end());
892+
auto elementRefType = builder.getRefType(sequenceType.getEleTy());
893+
toCoor = builder.create<fir::CoordinateOp>(loc, elementRefType, toCoor,
894+
indices);
895+
fromCoor = builder.create<fir::CoordinateOp>(loc, elementRefType,
896+
fromCoor, indices);
897+
}
898+
auto fieldElementType = fir::unwrapSequenceType(fieldType);
899+
if (fieldElementType.isa<fir::BoxType>()) {
900+
assert(fieldElementType.cast<fir::BoxType>()
901+
.getEleTy()
902+
.isa<fir::PointerType>() &&
903+
"allocatable require deep copy");
904+
auto fromPointerValue = builder.create<fir::LoadOp>(loc, fromCoor);
905+
builder.create<fir::StoreOp>(loc, fromPointerValue, toCoor);
906+
} else {
907+
auto from =
908+
fir::factory::componentToExtendedValue(builder, loc, fromCoor);
909+
auto to = fir::factory::componentToExtendedValue(builder, loc, toCoor);
910+
fir::factory::genScalarAssignment(builder, loc, to, from);
911+
}
912+
if (outerLoop)
913+
builder.setInsertionPointAfter(*outerLoop);
914+
}
915+
}
916+
826917
/// Can the assignment of this record type be implement with a simple memory
827-
/// copy ?
918+
/// copy (it requires no deep copy or user defined assignment of components )?
828919
static bool recordTypeCanBeMemCopied(fir::RecordType recordType) {
829920
if (fir::hasDynamicSize(recordType))
830921
return false;
@@ -872,10 +963,13 @@ void fir::factory::genRecordAssignment(fir::FirOpBuilder &builder,
872963
}
873964
// Otherwise, the derived type has compile time constant size and for which
874965
// the component by component assignment can be replaced by a memory copy.
875-
auto rhsVal = fir::getBase(rhs);
876-
if (fir::isa_ref_type(rhsVal.getType()))
877-
rhsVal = builder.create<fir::LoadOp>(loc, rhsVal);
878-
builder.create<fir::StoreOp>(loc, rhsVal, fir::getBase(lhs));
966+
// Since we do not know the size of the derived type in lowering, do a
967+
// component by component assignment. Note that a single fir.load/fir.store
968+
// could be used on "small" record types, but as the type size grows, this
969+
// leads to issues in LLVM (long compile times, long IR files, and even
970+
// asserts at some point). Since there is no good size boundary, just always
971+
// use component by component assignment here.
972+
genComponentByComponentAssignment(builder, loc, lhs, rhs);
879973
}
880974

881975
mlir::TupleType

flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -803,24 +803,22 @@ void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter,
803803
factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, indices),
804804
typeparams);
805805
auto eleTy = unwrapSequenceType(unwrapPassByRefType(dst.getType()));
806-
if (hasDynamicSize(eleTy)) {
807-
if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
808-
assert(charTy.hasDynamicLen() && "dynamic size and constant length");
809-
// Copy from (to) object to (from) temp copy of same object.
810-
auto len = typeparams.back();
811-
CharBoxValue toChar(toAddr, len);
812-
CharBoxValue fromChar(fromAddr, len);
813-
auto module = toAddr->getParentOfType<mlir::ModuleOp>();
814-
FirOpBuilder builder(rewriter, getKindMapping(module));
815-
factory::CharacterExprHelper helper{builder, loc};
816-
helper.createAssign(ExtendedValue{toChar}, ExtendedValue{fromChar});
817-
} else {
818-
TODO(loc, "copy element of dynamic size");
819-
}
806+
auto module = toAddr->getParentOfType<mlir::ModuleOp>();
807+
FirOpBuilder builder(rewriter, getKindMapping(module));
808+
// Copy from (to) object to (from) temp copy of same object.
809+
if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
810+
auto len =
811+
charTy.hasDynamicLen()
812+
? typeparams.back()
813+
: builder.createIntegerConstant(
814+
loc, builder.getCharacterLengthType(), charTy.getLen());
815+
CharBoxValue toChar(toAddr, len);
816+
CharBoxValue fromChar(fromAddr, len);
817+
fir::factory::genScalarAssignment(builder, loc, toChar, fromChar);
820818
} else {
821-
// TODO: Should this check if the size of the element is "too big"?
822-
auto load = rewriter.create<fir::LoadOp>(loc, fromAddr);
823-
rewriter.create<fir::StoreOp>(loc, load, toAddr);
819+
if (hasDynamicSize(eleTy))
820+
TODO(loc, "copy element of dynamic size");
821+
fir::factory::genScalarAssignment(builder, loc, toAddr, fromAddr);
824822
}
825823
rewriter.restoreInsertionPoint(insPt);
826824
}

flang/test/Fir/array-value-copy.f90

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,11 @@
3434
// CHECK: %[[VAL_18:.*]] = arith.constant 1 : index
3535
// CHECK: %[[VAL_19:.*]] = arith.addi %[[VAL_14]], %[[VAL_18]] : index
3636
// CHECK: %[[VAL_20:.*]] = fir.array_coor %[[VAL_9]](%[[VAL_6]]) %[[VAL_19]] : (!fir.heap<!fir.array<100x!fir.type<t{i:i32}>>>, !fir.shape<1>, index) -> !fir.ref<!fir.type<t{i:i32}>>
37-
// CHECK: %[[VAL_21:.*]] = fir.load %[[VAL_17]] : !fir.ref<!fir.type<t{i:i32}>>
38-
// CHECK: fir.store %[[VAL_21]] to %[[VAL_20]] : !fir.ref<!fir.type<t{i:i32}>>
37+
// CHECK: %[[VAL_21:.*]] = fir.field_index i, !fir.type<t{i:i32}>
38+
// CHECK: %[[VAL_22:.*]] = fir.coordinate_of %[[VAL_17]], %[[VAL_21]] : (!fir.ref<!fir.type<t{i:i32}>>, !fir.field) -> !fir.ref<i32>
39+
// CHECK: %[[VAL_23:.*]] = fir.coordinate_of %[[VAL_20]], %[[VAL_21]] : (!fir.ref<!fir.type<t{i:i32}>>, !fir.field) -> !fir.ref<i32>
40+
// CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_22]] : !fir.ref<i32>
41+
// CHECK: fir.store %[[VAL_24]] to %[[VAL_23]] : !fir.ref<i32>
3942
// CHECK: }
4043

4144
// Actual assignment and copy-out

flang/test/Lower/allocatable-assignment.f90

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -244,14 +244,17 @@ subroutine test_derived_scalar(x, s)
244244
! CHECK: %[[VAL_13:.*]] = fir.allocmem !fir.type<_QMalloc_assignTt{i:i32}> {uniq_name = ".auto.alloc"}
245245
! CHECK: fir.result %[[VAL_12]], %[[VAL_13]] : i1, !fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>
246246
! CHECK: }
247-
! CHECK: %[[VAL_14:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.type<_QMalloc_assignTt{i:i32}>>
248-
! CHECK: fir.store %[[VAL_14]] to %[[VAL_7]]#1 : !fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>
247+
! CHECK: %[[VAL_14:.*]] = fir.field_index i, !fir.type<_QMalloc_assignTt{i:i32}>
248+
! CHECK: %[[VAL_15:.*]] = fir.coordinate_of %[[VAL_1]], %[[VAL_14]] : (!fir.ref<!fir.type<_QMalloc_assignTt{i:i32}>>, !fir.field) -> !fir.ref<i32>
249+
! CHECK: %[[VAL_16:.*]] = fir.coordinate_of %[[VAL_7]]#1, %[[VAL_14]] : (!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>, !fir.field) -> !fir.ref<i32>
250+
! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_15]] : !fir.ref<i32>
251+
! CHECK: fir.store %[[VAL_17]] to %[[VAL_16]] : !fir.ref<i32
249252
! CHECK: fir.if %[[VAL_7]]#0 {
250253
! CHECK: fir.if %[[VAL_6]] {
251254
! CHECK: fir.freemem %[[VAL_3]] : !fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>
252255
! CHECK: }
253-
! CHECK: %[[VAL_16:.*]] = fir.embox %[[VAL_7]]#1 : (!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>) -> !fir.box<!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>>
254-
! CHECK: fir.store %[[VAL_16]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>>>
256+
! CHECK: %[[VAL_19:.*]] = fir.embox %[[VAL_7]]#1 : (!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>) -> !fir.box<!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>>
257+
! CHECK: fir.store %[[VAL_19]] to %[[VAL_0]] : !fir.ref<!fir.box<!fir.heap<!fir.type<_QMalloc_assignTt{i:i32}>>>>
255258
! CHECK: }
256259
end subroutine
257260

0 commit comments

Comments
 (0)