Skip to content

Commit 0ce78d7

Browse files
committed
Cherry-pick PR llvm#157559
1 parent fbb4cb1 commit 0ce78d7

36 files changed

+456
-326
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,6 @@ class IndVarSimplify {
157157
const SCEV *ExitCount,
158158
PHINode *IndVar, SCEVExpander &Rewriter);
159159

160-
bool sinkUnusedInvariants(Loop *L);
161-
162160
public:
163161
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
164162
const DataLayout &DL, TargetLibraryInfo *TLI,
@@ -1074,85 +1072,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
10741072
return true;
10751073
}
10761074

1077-
//===----------------------------------------------------------------------===//
1078-
// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1079-
//===----------------------------------------------------------------------===//
1080-
1081-
/// If there's a single exit block, sink any loop-invariant values that
1082-
/// were defined in the preheader but not used inside the loop into the
1083-
/// exit block to reduce register pressure in the loop.
1084-
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1085-
BasicBlock *ExitBlock = L->getExitBlock();
1086-
if (!ExitBlock) return false;
1087-
1088-
BasicBlock *Preheader = L->getLoopPreheader();
1089-
if (!Preheader) return false;
1090-
1091-
bool MadeAnyChanges = false;
1092-
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1093-
1094-
// Skip BB Terminator.
1095-
if (Preheader->getTerminator() == &I)
1096-
continue;
1097-
1098-
// New instructions were inserted at the end of the preheader.
1099-
if (isa<PHINode>(I))
1100-
break;
1101-
1102-
// Don't move instructions which might have side effects, since the side
1103-
// effects need to complete before instructions inside the loop. Also don't
1104-
// move instructions which might read memory, since the loop may modify
1105-
// memory. Note that it's okay if the instruction might have undefined
1106-
// behavior: LoopSimplify guarantees that the preheader dominates the exit
1107-
// block.
1108-
if (I.mayHaveSideEffects() || I.mayReadFromMemory())
1109-
continue;
1110-
1111-
// Skip debug or pseudo instructions.
1112-
if (I.isDebugOrPseudoInst())
1113-
continue;
1114-
1115-
// Skip eh pad instructions.
1116-
if (I.isEHPad())
1117-
continue;
1118-
1119-
// Don't sink alloca: we never want to sink static alloca's out of the
1120-
// entry block, and correctly sinking dynamic alloca's requires
1121-
// checks for stacksave/stackrestore intrinsics.
1122-
// FIXME: Refactor this check somehow?
1123-
if (isa<AllocaInst>(&I))
1124-
continue;
1125-
1126-
// Determine if there is a use in or before the loop (direct or
1127-
// otherwise).
1128-
bool UsedInLoop = false;
1129-
for (Use &U : I.uses()) {
1130-
Instruction *User = cast<Instruction>(U.getUser());
1131-
BasicBlock *UseBB = User->getParent();
1132-
if (PHINode *P = dyn_cast<PHINode>(User)) {
1133-
unsigned i =
1134-
PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1135-
UseBB = P->getIncomingBlock(i);
1136-
}
1137-
if (UseBB == Preheader || L->contains(UseBB)) {
1138-
UsedInLoop = true;
1139-
break;
1140-
}
1141-
}
1142-
1143-
// If there is, the def must remain in the preheader.
1144-
if (UsedInLoop)
1145-
continue;
1146-
1147-
// Otherwise, sink it to the exit block.
1148-
I.moveBefore(ExitBlock->getFirstInsertionPt());
1149-
SE->forgetValue(&I);
1150-
MadeAnyChanges = true;
1151-
}
1152-
1153-
return MadeAnyChanges;
1154-
}
1155-
11561075
static void replaceExitCond(BranchInst *BI, Value *NewCond,
11571076
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
11581077
auto *OldCond = BI->getCondition();
@@ -2015,10 +1934,6 @@ bool IndVarSimplify::run(Loop *L) {
20151934

20161935
// The Rewriter may not be used from this point on.
20171936

2018-
// Loop-invariant instructions in the preheader that aren't used in the
2019-
// loop may be sunk below the loop to reduce register pressure.
2020-
Changed |= sinkUnusedInvariants(L);
2021-
20221937
// rewriteFirstIterationLoopExitValues does not rely on the computation of
20231938
// trip count and therefore can further simplify exit values in addition to
20241939
// rewriteLoopExitValues.

llvm/lib/Transforms/Scalar/LICM.cpp

Lines changed: 80 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,15 @@ static Instruction *cloneInstructionInExitBlock(
213213
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
214214
MemorySSAUpdater &MSSAU);
215215

216-
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
217-
ICFLoopSafetyInfo &SafetyInfo,
218-
MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
216+
static void moveInstructionBefore(
217+
Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo,
218+
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
219+
MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator);
220+
221+
static bool sinkUnusedInvariantsFromPreheaderToExit(
222+
Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
223+
MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
224+
SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);
219225

220226
static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
221227
function_ref<void(Instruction *)> Fn);
@@ -473,6 +479,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
473479
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
474480
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
475481
MSSAU, &SafetyInfo, Flags, ORE);
482+
483+
// sink pre-header defs that are unused in-loop into the unique exit to reduce
484+
// pressure.
485+
Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU,
486+
SE, DT, Flags, ORE);
487+
476488
Flags.setIsSink(false);
477489
if (Preheader)
478490
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
@@ -1460,19 +1472,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
14601472

14611473
static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
14621474
ICFLoopSafetyInfo &SafetyInfo,
1463-
MemorySSAUpdater &MSSAU,
1464-
ScalarEvolution *SE) {
1475+
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
1476+
MemorySSA::InsertionPlace Point) {
14651477
SafetyInfo.removeInstruction(&I);
14661478
SafetyInfo.insertInstructionTo(&I, Dest->getParent());
14671479
I.moveBefore(*Dest->getParent(), Dest);
14681480
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
14691481
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
1470-
MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
1471-
MemorySSA::BeforeTerminator);
1482+
MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);
14721483
if (SE)
14731484
SE->forgetBlockAndLoopDispositions(&I);
14741485
}
14751486

1487+
// If there's a single exit block, sink any loop-invariant values that were
1488+
// defined in the preheader but not used inside the loop into the exit block
1489+
// to reduce register pressure in the loop.
1490+
static bool sinkUnusedInvariantsFromPreheaderToExit(
1491+
Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
1492+
MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
1493+
SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) {
1494+
BasicBlock *ExitBlock = L->getExitBlock();
1495+
if (!ExitBlock)
1496+
return false;
1497+
1498+
BasicBlock *Preheader = L->getLoopPreheader();
1499+
if (!Preheader)
1500+
return false;
1501+
1502+
bool MadeAnyChanges = false;
1503+
1504+
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {
1505+
1506+
// Skip terminator.
1507+
if (Preheader->getTerminator() == &I)
1508+
continue;
1509+
1510+
// New instructions were inserted at the end of the preheader.
1511+
if (isa<PHINode>(I))
1512+
break;
1513+
1514+
// Don't move instructions which might have side effects, since the side
1515+
// effects need to complete before instructions inside the loop. Note that
1516+
// it's okay if the instruction might have undefined behavior: LoopSimplify
1517+
// guarantees that the preheader dominates the exit block.
1518+
if (I.mayHaveSideEffects())
1519+
continue;
1520+
1521+
if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr))
1522+
continue;
1523+
1524+
// Determine if there is a use in or before the loop (direct or
1525+
// otherwise).
1526+
bool UsedInLoopOrPreheader = false;
1527+
for (Use &U : I.uses()) {
1528+
auto *UserI = cast<Instruction>(U.getUser());
1529+
BasicBlock *UseBB = UserI->getParent();
1530+
if (auto *PN = dyn_cast<PHINode>(UserI)) {
1531+
UseBB = PN->getIncomingBlock(U);
1532+
}
1533+
if (UseBB == Preheader || L->contains(UseBB)) {
1534+
UsedInLoopOrPreheader = true;
1535+
break;
1536+
}
1537+
}
1538+
if (UsedInLoopOrPreheader)
1539+
continue;
1540+
1541+
moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
1542+
MSSAU, SE, MemorySSA::Beginning);
1543+
MadeAnyChanges = true;
1544+
}
1545+
1546+
return MadeAnyChanges;
1547+
}
1548+
14761549
static Instruction *sinkThroughTriviallyReplaceablePHI(
14771550
PHINode *TPN, Instruction *I, LoopInfo *LI,
14781551
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,

llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,10 +73,10 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
7373
}
7474

7575
; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
76-
; GFX908: NumSgprs: 64
77-
; GFX908-GCNTRACKERS: NumSgprs: 64
76+
; GFX908: NumSgprs: 56
77+
; GFX908-GCNTRACKERS: NumSgprs: 56
7878
; GFX908: NumVgprs: 43
79-
; GFX908-GCNTRACKERS: NumVgprs: 39
79+
; GFX908-GCNTRACKERS: NumVgprs: 40
8080
; GFX908: Occupancy: 5
8181
; GFX908-GCNTRACKERS: Occupancy: 6
8282

llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000
2323

2424
; OFFREG is offset system SGPR
25-
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
26-
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
2725
; GCN: NumVgprs: 256
2826
; GCN: ScratchSize: 640
2927

0 commit comments

Comments
 (0)