Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 0 additions & 85 deletions llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,6 @@ class IndVarSimplify {
const SCEV *ExitCount,
PHINode *IndVar, SCEVExpander &Rewriter);

bool sinkUnusedInvariants(Loop *L);

public:
IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
const DataLayout &DL, TargetLibraryInfo *TLI,
Expand Down Expand Up @@ -1074,85 +1072,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
return true;
}

//===----------------------------------------------------------------------===//
// sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
//===----------------------------------------------------------------------===//

/// If there's a single exit block, sink any loop-invariant values that
/// were defined in the preheader but not used inside the loop into the
/// exit block to reduce register pressure in the loop.
bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return false;

BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) return false;

bool MadeAnyChanges = false;
for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {

// Skip BB Terminator.
if (Preheader->getTerminator() == &I)
continue;

// New instructions were inserted at the end of the preheader.
if (isa<PHINode>(I))
break;

// Don't move instructions which might have side effects, since the side
// effects need to complete before instructions inside the loop. Also don't
// move instructions which might read memory, since the loop may modify
// memory. Note that it's okay if the instruction might have undefined
// behavior: LoopSimplify guarantees that the preheader dominates the exit
// block.
if (I.mayHaveSideEffects() || I.mayReadFromMemory())
continue;

// Skip debug or pseudo instructions.
if (I.isDebugOrPseudoInst())
continue;

// Skip eh pad instructions.
if (I.isEHPad())
continue;

// Don't sink alloca: we never want to sink static alloca's out of the
// entry block, and correctly sinking dynamic alloca's requires
// checks for stacksave/stackrestore intrinsics.
// FIXME: Refactor this check somehow?
if (isa<AllocaInst>(&I))
continue;

// Determine if there is a use in or before the loop (direct or
// otherwise).
bool UsedInLoop = false;
for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
BasicBlock *UseBB = User->getParent();
if (PHINode *P = dyn_cast<PHINode>(User)) {
unsigned i =
PHINode::getIncomingValueNumForOperand(U.getOperandNo());
UseBB = P->getIncomingBlock(i);
}
if (UseBB == Preheader || L->contains(UseBB)) {
UsedInLoop = true;
break;
}
}

// If there is, the def must remain in the preheader.
if (UsedInLoop)
continue;

// Otherwise, sink it to the exit block.
I.moveBefore(ExitBlock->getFirstInsertionPt());
SE->forgetValue(&I);
MadeAnyChanges = true;
}

return MadeAnyChanges;
}

static void replaceExitCond(BranchInst *BI, Value *NewCond,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
auto *OldCond = BI->getCondition();
Expand Down Expand Up @@ -2015,10 +1934,6 @@ bool IndVarSimplify::run(Loop *L) {

// The Rewriter may not be used from this point on.

// Loop-invariant instructions in the preheader that aren't used in the
// loop may be sunk below the loop to reduce register pressure.
Changed |= sinkUnusedInvariants(L);

// rewriteFirstIterationLoopExitValues does not rely on the computation of
// trip count and therefore can further simplify exit values in addition to
// rewriteLoopExitValues.
Expand Down
87 changes: 80 additions & 7 deletions llvm/lib/Transforms/Scalar/LICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,15 @@ static Instruction *cloneInstructionInExitBlock(
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU);

static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE);
static void moveInstructionBefore(
Instruction &I, BasicBlock::iterator Dest, ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
MemorySSA::InsertionPlace Point = MemorySSA::BeforeTerminator);

static bool sinkUnusedInvariantsFromPreheaderToExit(
Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE);

static void foreachMemoryAccess(MemorySSA *MSSA, Loop *L,
function_ref<void(Instruction *)> Fn);
Expand Down Expand Up @@ -473,6 +479,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE)
: sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L,
MSSAU, &SafetyInfo, Flags, ORE);

// sink pre-header defs that are unused in-loop into the unique exit to reduce
// pressure.
Changed |= sinkUnusedInvariantsFromPreheaderToExit(L, AA, &SafetyInfo, MSSAU,
SE, DT, Flags, ORE);

Flags.setIsSink(false);
if (Preheader)
Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L,
Expand Down Expand Up @@ -1460,19 +1472,80 @@ static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo,

static void moveInstructionBefore(Instruction &I, BasicBlock::iterator Dest,
ICFLoopSafetyInfo &SafetyInfo,
MemorySSAUpdater &MSSAU,
ScalarEvolution *SE) {
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
MemorySSA::InsertionPlace Point) {
SafetyInfo.removeInstruction(&I);
SafetyInfo.insertInstructionTo(&I, Dest->getParent());
I.moveBefore(*Dest->getParent(), Dest);
if (MemoryUseOrDef *OldMemAcc = cast_or_null<MemoryUseOrDef>(
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
MSSAU.moveToPlace(OldMemAcc, Dest->getParent(),
MemorySSA::BeforeTerminator);
MSSAU.moveToPlace(OldMemAcc, Dest->getParent(), Point);
if (SE)
SE->forgetBlockAndLoopDispositions(&I);
}

// If there's a single exit block, sink any loop-invariant values that were
// defined in the preheader but not used inside the loop into the exit block
// to reduce register pressure in the loop.
static bool sinkUnusedInvariantsFromPreheaderToExit(
Loop *L, AAResults *AA, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE, DominatorTree *DT,
SinkAndHoistLICMFlags &SinkFlags, OptimizationRemarkEmitter *ORE) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock)
return false;

BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader)
return false;

bool MadeAnyChanges = false;

for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) {

// Skip terminator.
if (Preheader->getTerminator() == &I)
continue;

// New instructions were inserted at the end of the preheader.
if (isa<PHINode>(I))
break;

// Don't move instructions which might have side effects, since the side
// effects need to complete before instructions inside the loop. Note that
// it's okay if the instruction might have undefined behavior: LoopSimplify
// guarantees that the preheader dominates the exit block.
if (I.mayHaveSideEffects())
continue;

if (!canSinkOrHoistInst(I, AA, DT, L, MSSAU, true, SinkFlags, nullptr))
continue;

// Determine if there is a use in or before the loop (direct or
// otherwise).
bool UsedInLoopOrPreheader = false;
for (Use &U : I.uses()) {
auto *UserI = cast<Instruction>(U.getUser());
BasicBlock *UseBB = UserI->getParent();
if (auto *PN = dyn_cast<PHINode>(UserI)) {
UseBB = PN->getIncomingBlock(U);
}
if (UseBB == Preheader || L->contains(UseBB)) {
UsedInLoopOrPreheader = true;
break;
}
}
if (UsedInLoopOrPreheader)
continue;

moveInstructionBefore(I, ExitBlock->getFirstInsertionPt(), *SafetyInfo,
MSSAU, SE, MemorySSA::Beginning);
MadeAnyChanges = true;
}

return MadeAnyChanges;
}

static Instruction *sinkThroughTriviallyReplaceablePHI(
PHINode *TPN, Instruction *I, LoopInfo *LI,
SmallDenseMap<BasicBlock *, Instruction *, 32> &SunkCopies,
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %
}

; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
; GFX908: NumSgprs: 64
; GFX908-GCNTRACKERS: NumSgprs: 64
; GFX908: NumSgprs: 56
; GFX908-GCNTRACKERS: NumSgprs: 56
; GFX908: NumVgprs: 43
; GFX908-GCNTRACKERS: NumVgprs: 39
; GFX908-GCNTRACKERS: NumVgprs: 40
; GFX908: Occupancy: 5
; GFX908-GCNTRACKERS: Occupancy: 6

Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@
; GFX9-DAG: s_mov_b32 s[[DESC3:[0-9]+]], 0xe00000

; OFFREG is offset system SGPR
; GCN: buffer_store_dword {{v[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Spill
; GCN: buffer_load_dword v{{[0-9]+}}, off, s[[[DESC0]]:[[DESC3]]], 0 offset:{{[0-9]+}} ; 4-byte Folded Reload
; GCN: NumVgprs: 256
; GCN: ScratchSize: 640

Expand Down
Loading
Loading