Skip to content

Commit 7ee0e0f

Browse files
committed
Revert "[LICM] Sink unused l-invariant loads in preheader. #157559"
This reverts commit 469702c. #168048
1 parent 928393b commit 7ee0e0f

36 files changed

+326
-457
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,8 @@ class IndVarSimplify {
162162
const SCEV *ExitCount,
163163
PHINode *IndVar, SCEVExpander &Rewriter);
164164

165+
bool sinkUnusedInvariants(Loop *L);
166+
165167
public:
166168
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
167169
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -1091,6 +1093,85 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
10911093
return true;
10921094
}
10931095

1096+
//===----------------------------------------------------------------------===//
1097+
// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1098+
//===----------------------------------------------------------------------===//
1099+
1100+
/// If there's a single exit block, sink any loop-invariant values that
1101+
/// were defined in the preheader but not used inside the loop into the
1102+
/// exit block to reduce register pressure in the loop.
1103+
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1104+
BasicBlock *ExitBlock = L->getExitBlock();
1105+
if (!ExitBlock) return false;
1106+
1107+
BasicBlock *Preheader = L->getLoopPreheader();
1108+
if (!Preheader) return false;
1109+
1110+
bool MadeAnyChanges = false;
1111+
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1112+
1113+
// Skip BB Terminator.
1114+
if (Preheader->getTerminator() == &I)
1115+
continue;
1116+
1117+
// New instructions were inserted at the end of the preheader.
1118+
if (isa<PHINode>(I))
1119+
break;
1120+
1121+
// Don't move instructions which might have side effects, since the side
1122+
// effects need to complete before instructions inside the loop. Also don't
1123+
// move instructions which might read memory, since the loop may modify
1124+
// memory. Note that it's okay if the instruction might have undefined
1125+
// behavior: LoopSimplify guarantees that the preheader dominates the exit
1126+
// block.
1127+
if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1128+
continue;
1129+
1130+
// Skip debug or pseudo instructions.
1131+
if (I.isDebugOrPseudoInst())
1132+
continue;
1133+
1134+
// Skip eh pad instructions.
1135+
if (I.isEHPad())
1136+
continue;
1137+
1138+
// Don't sink alloca: we never want to sink static alloca's out of the
1139+
// entry block, and correctly sinking dynamic alloca's requires
1140+
// checks for stacksave/stackrestore intrinsics.
1141+
// FIXME: Refactor this check somehow?
1142+
if (isa<AllocaInst>(&I))
1143+
continue;
1144+
1145+
// Determine if there is a use in or before the loop (direct or
1146+
// otherwise).
1147+
bool UsedInLoop = false;
1148+
for (Use &U : I.uses()) {
1149+
Instruction *User = cast<Instruction>(U.getUser());
1150+
BasicBlock *UseBB = User->getParent();
1151+
if (PHINode *P = dyn_cast<PHINode>(User)) {
1152+
unsigned i =
1153+
PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1154+
UseBB = P->getIncomingBlock(i);
1155+
}
1156+
if (UseBB == Preheader || L->contains(UseBB)) {
1157+
UsedInLoop = true;
1158+
break;
1159+
}
1160+
}
1161+
1162+
// If there is, the def must remain in the preheader.
1163+
if (UsedInLoop)
1164+
continue;
1165+
1166+
// Otherwise, sink it to the exit block.
1167+
I.moveBefore(ExitBlock->getFirstInsertionPt());
1168+
SE->forgetValue(&I);
1169+
MadeAnyChanges = true;
1170+
}
1171+
1172+
return MadeAnyChanges;
1173+
}
1174+
10941175
static void replaceExitCond(BranchInst *BI, Value *NewCond,
10951176
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
10961177
auto *OldCond = BI->getCondition();
@@ -1998,6 +2079,10 @@ bool IndVarSimplify::run(Loop *L) {
19982079

19992080
// The Rewriter may not be used from this point on.
20002081

2082+
// Loop-invariant instructions in the preheader that aren't used in the
2083+
// loop may be sunk below the loop to reduce register pressure.
2084+
Changed |= sinkUnusedInvariants(L);
2085+
20012086
// rewriteFirstIterationLoopExitValues does not rely on the computation of
20022087
// trip count and therefore can further simplify exit values in addition to
20032088
// rewriteLoopExitValues.

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 7 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -211,15 +211,9 @@ static Instruction *cloneInstructionInExitBlock(
211211
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
212212
MemorySSAUpdater &MSSAU);
213213

214-
static void moveInstructionBefore(
215-
Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo,
216-
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
217-
MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator);
218-
219-
static bool sinkUnusedInvariantsFromPreheaderToExit(
220-
Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
221-
MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
222-
SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);
214+
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
215+
ICFLoopSafetyInfo &SafetyInfo,
216+
MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
223217

224218
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
225219
function_ref<void(Instruction *)> Fn);
@@ -477,12 +471,6 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
477471
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
478472
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
479473
MSSAU, &SafetyInfo, Flags, ORE);
480-
481-
// sink pre-header defs that are unused in-loop into the unique exit to reduce
482-
// pressure.
483-
Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU,
484-
SE, DT, Flags, ORE);
485-
486474
Flags.setIsSink(false);
487475
if (Preheader)
488476
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
@@ -1468,80 +1456,19 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
14681456

14691457
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
14701458
ICFLoopSafetyInfo &SafetyInfo,
1471-
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1472-
MemorySSA::InsertionPlace Point) {
1459+
MemorySSAUpdater &MSSAU,
1460+
ScalarEvolution *SE) {
14731461
SafetyInfo.removeInstruction(&I);
14741462
SafetyInfo.insertInstructionTo(&I, Dest->getParent());
14751463
I.moveBefore(*Dest->getParent(), Dest);
14761464
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
14771465
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1478-
MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);
1466+
MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
1467+
MemorySSA::BeforeTerminator);
14791468
if (SE)
14801469
SE->forgetBlockAndLoopDispositions(&I);
14811470
}
14821471

1483-
// If there's a single exit block, sink any loop-invariant values that were
1484-
// defined in the preheader but not used inside the loop into the exit block
1485-
// to reduce register pressure in the loop.
1486-
static bool sinkUnusedInvariantsFromPreheaderToExit(
1487-
Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
1488-
MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
1489-
SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) {
1490-
BasicBlock *ExitBlock = L->getExitBlock();
1491-
if (!ExitBlock)
1492-
return false;
1493-
1494-
BasicBlock *Preheader = L->getLoopPreheader();
1495-
if (!Preheader)
1496-
return false;
1497-
1498-
bool MadeAnyChanges = false;
1499-
1500-
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1501-
1502-
// Skip terminator.
1503-
if (Preheader->getTerminator() == &I)
1504-
continue;
1505-
1506-
// New instructions were inserted at the end of the preheader.
1507-
if (isa<PHINode>(I))
1508-
break;
1509-
1510-
// Don't move instructions which might have side effects, since the side
1511-
// effects need to complete before instructions inside the loop. Note that
1512-
// it's okay if the instruction might have undefined behavior: LoopSimplify
1513-
// guarantees that the preheader dominates the exit block.
1514-
if (I.mayHaveSideEffects())
1515-
continue;
1516-
1517-
if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr))
1518-
continue;
1519-
1520-
// Determine if there is a use in or before the loop (direct or
1521-
// otherwise).
1522-
bool UsedInLoopOrPreheader = false;
1523-
for (Use &U : I.uses()) {
1524-
auto *UserI = cast<Instruction>(U.getUser());
1525-
BasicBlock *UseBB = UserI->getParent();
1526-
if (auto *PN = dyn_cast<PHINode>(UserI)) {
1527-
UseBB = PN->getIncomingBlock(U);
1528-
}
1529-
if (UseBB == Preheader || L->contains(UseBB)) {
1530-
UsedInLoopOrPreheader = true;
1531-
break;
1532-
}
1533-
}
1534-
if (UsedInLoopOrPreheader)
1535-
continue;
1536-
1537-
moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
1538-
MSSAU, SE, MemorySSA::Beginning);
1539-
MadeAnyChanges = true;
1540-
}
1541-
1542-
return MadeAnyChanges;
1543-
}
1544-
15451472
static Instruction *sinkThroughTriviallyReplaceablePHI(
15461473
PHINode *TPN, Instruction *I, LoopInfo *LI,
15471474
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,

llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
7373
}
7474

7575
; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
76-
; GFX908: NumSgprs: 56
77-
; GFX908-GCNTRACKERS: NumSgprs: 56
76+
; GFX908: NumSgprs: 64
77+
; GFX908-GCNTRACKERS: NumSgprs: 64
7878
; GFX908: NumVgprs: 43
79-
; GFX908-GCNTRACKERS: NumVgprs: 40
79+
; GFX908-GCNTRACKERS: NumVgprs: 39
8080
; GFX908: Occupancy: 5
8181
; GFX908-GCNTRACKERS: Occupancy: 6
8282

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
2323

2424
; OFFREG is offset system SGPR
25+
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
26+
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
2527
; GCN: NumVgprs: 256
2628
; GCN: ScratchSize: 640
2729

0 commit comments

Comments
 (0)