Skip to content

Commit 58bc994

Browse files
[fir] Always use memcpy for fir.load/store
LLVM is not as effective at optimizing aggregate loads and stores as it is optimizing memcpy calls, so we always generate the latter for load and stores between fir.box objects.
1 parent 8c2e8b5 commit 58bc994

File tree

1 file changed

+19
-39
lines changed

1 file changed

+19
-39
lines changed

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 19 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2951,7 +2951,7 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
29512951
mlir::ConversionPatternRewriter &rewriter) const override {
29522952
mlir::Type llvmLoadTy = convertObjectType(load.getType());
29532953
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(load.getType())) {
2954-
// fir.box is a special case because it is considered as an ssa values in
2954+
// fir.box is a special case because it is considered an ssa value in
29552955
// fir, but it is lowered as a pointer to a descriptor. So
29562956
// fir.ref<fir.box> and fir.box end up being the same llvm types and
29572957
// loading a fir.ref<fir.box> is implemented as taking a snapshot of the
@@ -2960,30 +2960,17 @@ struct LoadOpConversion : public fir::FIROpConversion<fir::LoadOp> {
29602960
mlir::Location loc = load.getLoc();
29612961
auto newBoxStorage =
29622962
genAllocaAndAddrCastWithType(loc, llvmLoadTy, defaultAlign, rewriter);
2963-
// TODO: always generate llvm.memcpy, LLVM is better at optimizing it than
2964-
// aggregate loads + stores.
2965-
if (boxTy.isAssumedRank()) {
2966-
2967-
TypePair boxTypePair{boxTy, llvmLoadTy};
2968-
mlir::Value boxSize =
2969-
computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
2970-
auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
2971-
loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
2972-
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
2973-
memcpy.setTBAATags(*optionalTag);
2974-
else
2975-
attachTBAATag(memcpy, boxTy, boxTy, nullptr);
2976-
} else {
2977-
auto boxValue = rewriter.create<mlir::LLVM::LoadOp>(loc, llvmLoadTy,
2978-
inputBoxStorage);
2979-
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
2980-
boxValue.setTBAATags(*optionalTag);
2981-
else
2982-
attachTBAATag(boxValue, boxTy, boxTy, nullptr);
2983-
auto storeOp =
2984-
rewriter.create<mlir::LLVM::StoreOp>(loc, boxValue, newBoxStorage);
2985-
attachTBAATag(storeOp, boxTy, boxTy, nullptr);
2986-
}
2963+
2964+
TypePair boxTypePair{boxTy, llvmLoadTy};
2965+
mlir::Value boxSize =
2966+
computeBoxSize(loc, boxTypePair, inputBoxStorage, rewriter);
2967+
auto memcpy = rewriter.create<mlir::LLVM::MemcpyOp>(
2968+
loc, newBoxStorage, inputBoxStorage, boxSize, /*isVolatile=*/false);
2969+
2970+
if (std::optional<mlir::ArrayAttr> optionalTag = load.getTbaa())
2971+
memcpy.setTBAATags(*optionalTag);
2972+
else
2973+
attachTBAATag(memcpy, boxTy, boxTy, nullptr);
29872974
rewriter.replaceOp(load, newBoxStorage);
29882975
} else {
29892976
auto loadOp = rewriter.create<mlir::LLVM::LoadOp>(
@@ -3227,20 +3214,13 @@ struct StoreOpConversion : public fir::FIROpConversion<fir::StoreOp> {
32273214
mlir::LLVM::AliasAnalysisOpInterface newOp;
32283215
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(storeTy)) {
32293216
mlir::Type llvmBoxTy = lowerTy().convertBoxTypeAsStruct(boxTy);
3230-
// fir.box value is actually in memory, load it first before storing it,
3231-
// or do a memcopy for assumed-rank descriptors.
3232-
if (boxTy.isAssumedRank()) {
3233-
TypePair boxTypePair{boxTy, llvmBoxTy};
3234-
mlir::Value boxSize =
3235-
computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
3236-
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3237-
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
3238-
} else {
3239-
auto val =
3240-
rewriter.create<mlir::LLVM::LoadOp>(loc, llvmBoxTy, llvmValue);
3241-
attachTBAATag(val, boxTy, boxTy, nullptr);
3242-
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, val, llvmMemref);
3243-
}
3217+
// Always use memcpy because LLVM is not as effective at optimizing
3218+
// aggregate loads/stores as it is optimizing memcpy.
3219+
TypePair boxTypePair{boxTy, llvmBoxTy};
3220+
mlir::Value boxSize =
3221+
computeBoxSize(loc, boxTypePair, llvmValue, rewriter);
3222+
newOp = rewriter.create<mlir::LLVM::MemcpyOp>(
3223+
loc, llvmMemref, llvmValue, boxSize, /*isVolatile=*/false);
32443224
} else {
32453225
newOp = rewriter.create<mlir::LLVM::StoreOp>(loc, llvmValue, llvmMemref);
32463226
}

0 commit comments

Comments
 (0)