Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 25 additions & 26 deletions llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6024,33 +6024,34 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
DeadInsts.emplace_back(OperandIsInstr);
}

// Trying to hoist the IVInc to loop header if all IVInc users are in
// the loop header. It will help backend to generate post index load/store
// when the latch block is different from loop header block.
static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
const LSRUse &LU, Instruction *IVIncInsertPos,
Loop *L) {
// Determine where to insert the transformed IV increment instruction for this
// fixup. By default this is the default insert position, but if this is a
// postincrement opportunity then we try to insert it in the same block as the
// fixup user instruction, as this is needed for a postincrement instruction to
// be generated.
static Instruction *getFixupInsertPos(const TargetTransformInfo &TTI,
const LSRFixup &Fixup, const LSRUse &LU,
Instruction *IVIncInsertPos,
DominatorTree &DT) {
// Only address uses can be postincremented
if (LU.Kind != LSRUse::Address)
return false;

// For now this code do the conservative optimization, only work for
// the header block. Later we can hoist the IVInc to the block post
// dominate all users.
BasicBlock *LHeader = L->getHeader();
if (IVIncInsertPos->getParent() == LHeader)
return false;

if (!Fixup.OperandValToReplace ||
any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
Instruction *UI = cast<Instruction>(U);
return UI->getParent() != LHeader;
}))
return false;
return IVIncInsertPos;

// Don't try to postincrement if it's not legal
Instruction *I = Fixup.UserInst;
Type *Ty = I->getType();
return (isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty));
if (!(isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) &&
!(isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)))
return IVIncInsertPos;

// It's only legal to hoist to the user block if it dominates the default
// insert position.
BasicBlock *HoistBlock = I->getParent();
BasicBlock *IVIncBlock = IVIncInsertPos->getParent();
if (!DT.dominates(I, IVIncBlock))
return IVIncInsertPos;

return HoistBlock->getTerminator();
}

/// Rewrite all the fixup locations with new values, following the chosen
Expand All @@ -6071,9 +6072,7 @@ void LSRInstance::ImplementSolution(
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
Instruction *InsertPos =
canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
? L->getHeader()->getTerminator()
: IVIncInsertPos;
getFixupInsertPos(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, DT);
Rewriter.setIVIncInsertPos(L, InsertPos);
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
Changed = true;
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/Thumb2/mve-blockplacement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,8 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
; CHECK-NEXT: cset r6, ne
; CHECK-NEXT: strb r6, [r5]
; CHECK-NEXT: add.w r2, r2, #792
; CHECK-NEXT: ldrb r6, [r3]
; CHECK-NEXT: ldrb r6, [r3], #2
; CHECK-NEXT: adds r4, #8
; CHECK-NEXT: adds r3, #2
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: sxthne r6, r1
Expand Down Expand Up @@ -101,8 +100,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
; CHECK-NEXT: cset r6, ne
; CHECK-NEXT: adds r4, #8
; CHECK-NEXT: strb r6, [r5]
; CHECK-NEXT: ldrb r6, [r3]
; CHECK-NEXT: adds r3, #2
; CHECK-NEXT: ldrb r6, [r3], #2
; CHECK-NEXT: cmp r6, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: sxthne r6, r1
Expand Down Expand Up @@ -134,8 +132,7 @@ define i32 @test(i8 zeroext %var_2, i16 signext %var_15, ptr %arr_60) {
; CHECK-NEXT: cset r4, ne
; CHECK-NEXT: add.w r11, r11, #8
; CHECK-NEXT: strb r4, [r5]
; CHECK-NEXT: ldrb r4, [r3]
; CHECK-NEXT: adds r3, #2
; CHECK-NEXT: ldrb r4, [r3], #2
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: ite ne
; CHECK-NEXT: sxthne r4, r1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,8 +230,6 @@ exit:

; The control-flow before and after the load of qval shouldn't prevent postindex
; addressing from happening.
; FIXME: We choose postindex addressing, but the scevgep is placed in for.inc so
; during codegen we will fail to actually generate a postindex load.
define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
; CHECK-LABEL: define void @middle_block_load(
; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) {
Expand All @@ -254,6 +252,7 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
; CHECK: [[IF_END]]:
; CHECK-NEXT: [[QVAL:%.*]] = load i32, ptr [[LSR_IV1]], align 4
; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[QVAL]], 0
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: br i1 [[CMP2]], label %[[IF_THEN2:.*]], label %[[IF_ELSE2:.*]]
; CHECK: [[IF_THEN2]]:
; CHECK-NEXT: tail call void @otherfn1()
Expand All @@ -263,7 +262,6 @@ define void @middle_block_load(ptr %p, ptr %q, i64 %n) {
; CHECK-NEXT: br label %[[FOR_INC]]
; CHECK: [[FOR_INC]]:
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
; CHECK-NEXT: br i1 [[CMP3]], label %[[EXIT:.*]], label %[[FOR_BODY]]
; CHECK: [[EXIT]]:
Expand Down