Skip to content

Commit ecdf7ed

Browse files
committed
[AArch64LoadStoreOpt] Look for reg update instruction (to merge w/ mem instruction into pre/post-increment form) not only inside a single MBB but also along a CF path going downward w/o side enters such that BaseReg is alive along it but not at its exits. Regression test is updated accordingly.
1 parent 1a7b7e2 commit ecdf7ed

File tree

2 files changed

+57
-25
lines changed

2 files changed

+57
-25
lines changed

llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp

Lines changed: 56 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2529,30 +2529,63 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
25292529
return E;
25302530
}
25312531

2532-
for (unsigned Count = 0; MBBI != E && Count < Limit;
2533-
MBBI = next_nodbg(MBBI, E)) {
2534-
MachineInstr &MI = *MBBI;
2535-
2536-
// Don't count transient instructions towards the search limit since there
2537-
// may be different numbers of them if e.g. debug information is present.
2538-
if (!MI.isTransient())
2539-
++Count;
2540-
2541-
// If we found a match, return it.
2542-
if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2543-
return MBBI;
2544-
2545-
// Update the status of what the instruction clobbered and used.
2546-
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
2532+
MachineBasicBlock *CurMBB = I->getParent();
2533+
// choice of next block to visit is liveins-based
2534+
bool VisitSucc = CurMBB->getParent()->getRegInfo().tracksLiveness();
2535+
2536+
while (true) {
2537+
MachineBasicBlock::iterator CurEnd = CurMBB->end();
2538+
2539+
for (unsigned Count = 0; MBBI != CurEnd && Count < Limit;
2540+
MBBI = next_nodbg(MBBI, CurEnd)) {
2541+
MachineInstr &MI = *MBBI;
2542+
2543+
// Don't count transient instructions towards the search limit since there
2544+
// may be different numbers of them if e.g. debug information is present.
2545+
if (!MI.isTransient())
2546+
++Count;
2547+
2548+
// If we found a match, return it.
2549+
if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
2550+
return MBBI;
2551+
2552+
// Update the status of what the instruction clobbered and used.
2553+
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits,
2554+
TRI);
2555+
2556+
// Otherwise, if the base register is used or modified, we have no match,
2557+
// so return early. If we are optimizing SP, do not allow instructions
2558+
// that may load or store in between the load and the optimized value
2559+
// update.
2560+
if (!ModifiedRegUnits.available(BaseReg) ||
2561+
!UsedRegUnits.available(BaseReg) ||
2562+
(BaseRegSP && MBBI->mayLoadOrStore()))
2563+
return E;
2564+
}
25472565

2548-
// Otherwise, if the base register is used or modified, we have no match, so
2549-
// return early.
2550-
// If we are optimizing SP, do not allow instructions that may load or store
2551-
// in between the load and the optimized value update.
2552-
if (!ModifiedRegUnits.available(BaseReg) ||
2553-
!UsedRegUnits.available(BaseReg) ||
2554-
(BaseRegSP && MBBI->mayLoadOrStore()))
2555-
return E;
2566+
if (VisitSucc) {
2567+
// Try to go downward to successors along a CF path w/o side enters
2568+
// such that BaseReg is alive along it but not at its exits
2569+
MachineBasicBlock *SuccToVisit = nullptr;
2570+
unsigned LiveSuccCount = 0;
2571+
for (MachineBasicBlock *Succ : CurMBB->successors()) {
2572+
if (Succ->isLiveIn(BaseReg)) {
2573+
if (LiveSuccCount++) {
2574+
return E;
2575+
}
2576+
if (Succ->pred_size() == 1) {
2577+
SuccToVisit = Succ;
2578+
}
2579+
}
2580+
}
2581+
if (!SuccToVisit) {
2582+
break;
2583+
}
2584+
CurMBB = SuccToVisit;
2585+
MBBI = CurMBB->begin();
2586+
} else {
2587+
break;
2588+
}
25562589
}
25572590
return E;
25582591
}

llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,11 @@ define i32 @negative_test_type_is_struct(i32 %c, ptr %a, ptr %b) {
131131
; CHECK-NEXT: mov w8, w0
132132
; CHECK-NEXT: .LBB2_2: // %for.body
133133
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
134-
; CHECK-NEXT: ldr w9, [x1]
134+
; CHECK-NEXT: ldr w9, [x1], #4
135135
; CHECK-NEXT: cbnz w9, .LBB2_5
136136
; CHECK-NEXT: // %bb.3: // %for.cond
137137
; CHECK-NEXT: // in Loop: Header=BB2_2 Depth=1
138138
; CHECK-NEXT: subs x8, x8, #1
139-
; CHECK-NEXT: add x1, x1, #4
140139
; CHECK-NEXT: b.ne .LBB2_2
141140
; CHECK-NEXT: .LBB2_4:
142141
; CHECK-NEXT: mov w0, wzr

0 commit comments

Comments
 (0)