Skip to content

Commit 2ad7174

Browse files
[LSR] Insert the transformed IV increment in the user block (llvm#169515)
Currently we try to hoist the transformed IV increment instruction to the header block to help with generation of postincrement instructions, but this only works if the user instruction is also in the header. We should instead be trying to insert it in the same block as the user.
1 parent 9063416 commit 2ad7174

File tree

3 files changed

+29
-35
lines changed

3 files changed

+29
-35
lines changed

llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6024,33 +6024,34 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
60246024
DeadInsts.emplace_back(OperandIsInstr);
60256025
}
60266026

6027-
// Trying to hoist the IVInc to loop header if all IVInc users are in
6028-
// the loop header. It will help backend to generate post index load/store
6029-
// when the latch block is different from loop header block.
6030-
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
6031-
const LSRUse &LU, Instruction *IVIncInsertPos,
6032-
Loop *L) {
6027+
// Determine where to insert the transformed IV increment instruction for this
6028+
// fixup. By default this is the default insert position, but if this is a
6029+
// postincrement opportunity then we try to insert it in the same block as the
6030+
// fixup user instruction, as this is needed for a postincrement instruction to
6031+
// be generated.
6032+
static Instruction *getFixupInsertPos(const TargetTransformInfo &TTI,
6033+
const LSRFixup &Fixup, const LSRUse &LU,
6034+
Instruction *IVIncInsertPos,
6035+
DominatorTree &DT) {
6036+
// Only address uses can be postincremented
60336037
if (LU.Kind != LSRUse::Address)
6034-
return false;
6035-
6036-
// For now this code do the conservative optimization, only work for
6037-
// the header block. Later we can hoist the IVInc to the block post
6038-
// dominate all users.
6039-
BasicBlock *LHeader = L->getHeader();
6040-
if (IVIncInsertPos->getParent() == LHeader)
6041-
return false;
6042-
6043-
if (!Fixup.OperandValToReplace ||
6044-
any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
6045-
Instruction *UI = cast<Instruction>(U);
6046-
return UI->getParent() != LHeader;
6047-
}))
6048-
return false;
6038+
return IVIncInsertPos;
60496039

6040+
// Don't try to postincrement if it's not legal
60506041
Instruction *I = Fixup.UserInst;
60516042
Type *Ty = I->getType();
6052-
return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
6053-
(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));
6043+
if (!(isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) &&
6044+
!(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)))
6045+
return IVIncInsertPos;
6046+
6047+
// It's only legal to hoist to the user block if it dominates the default
6048+
// insert position.
6049+
BasicBlock *HoistBlock = I->getParent();
6050+
BasicBlock *IVIncBlock = IVIncInsertPos->getParent();
6051+
if (!DT.dominates(I, IVIncBlock))
6052+
return IVIncInsertPos;
6053+
6054+
return HoistBlock->getTerminator();
60546055
}
60556056

60566057
/// Rewrite all the fixup locations with new values, following the chosen
@@ -6071,9 +6072,7 @@ void LSRInstance::ImplementSolution(
60716072
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
60726073
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
60736074
Instruction *InsertPos =
6074-
canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
6075-
? L->getHeader()->getTerminator()
6076-
: IVIncInsertPos;
6075+
getFixupInsertPos(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, DT);
60776076
Rewriter.setIVIncInsertPos(L, InsertPos);
60786077
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
60796078
Changed = true;

llvm/test/CodeGen/Thumb2/mve-blockplacement.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,8 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
6666
; CHECK-NEXT: cset r6, ne
6767
; CHECK-NEXT: strb r6, [r5]
6868
; CHECK-NEXT: add.w r2, r2, #792
69-
; CHECK-NEXT: ldrb r6, [r3]
69+
; CHECK-NEXT: ldrb r6, [r3], #2
7070
; CHECK-NEXT: adds r4, #8
71-
; CHECK-NEXT: adds r3, #2
7271
; CHECK-NEXT: cmp r6, #0
7372
; CHECK-NEXT: ite ne
7473
; CHECK-NEXT: sxthne r6, r1
@@ -101,8 +100,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
101100
; CHECK-NEXT: cset r6, ne
102101
; CHECK-NEXT: adds r4, #8
103102
; CHECK-NEXT: strb r6, [r5]
104-
; CHECK-NEXT: ldrb r6, [r3]
105-
; CHECK-NEXT: adds r3, #2
103+
; CHECK-NEXT: ldrb r6, [r3], #2
106104
; CHECK-NEXT: cmp r6, #0
107105
; CHECK-NEXT: ite ne
108106
; CHECK-NEXT: sxthne r6, r1
@@ -134,8 +132,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
134132
; CHECK-NEXT: cset r4, ne
135133
; CHECK-NEXT: add.w r11, r11, #8
136134
; CHECK-NEXT: strb r4, [r5]
137-
; CHECK-NEXT: ldrb r4, [r3]
138-
; CHECK-NEXT: adds r3, #2
135+
; CHECK-NEXT: ldrb r4, [r3], #2
139136
; CHECK-NEXT: cmp r4, #0
140137
; CHECK-NEXT: ite ne
141138
; CHECK-NEXT: sxthne r4, r1

llvm/test/Transforms/LoopStrengthReduce/AArch64/prefer-all.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -230,8 +230,6 @@ exit:
230230

231231
; The control-flow before and after the load of qval shouldn't prevent postindex
232232
; addressing from happening.
233-
; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so
234-
; during codegen we will fail to actually generate a postindex load.
235233
define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
236234
; CHECK-LABEL: define void @middle_block_load(
237235
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) {
@@ -254,6 +252,7 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
254252
; CHECK: [[IF_END]]:
255253
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
256254
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0
255+
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
257256
; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]]
258257
; CHECK: [[IF_THEN2]]:
259258
; CHECK-NEXT: tail call void @otherfn1()
@@ -263,7 +262,6 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
263262
; CHECK-NEXT: br label %[[FOR_INC]]
264263
; CHECK: [[FOR_INC]]:
265264
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
266-
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
267265
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
268266
; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]]
269267
; CHECK: [[EXIT]]:

0 commit comments

Comments
 (0)