Skip to content

Commit 8086280

Browse files
committed
Remove inefficient sort, add a parameter to limit the number of instructions checked.
1 parent c0ef5b7 commit 8086280

File tree

2 files changed

+67
-62
lines changed

2 files changed

+67
-62
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 34 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ static cl::opt<unsigned>
8585
BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
8686
cl::desc("Restrict range of B instructions (DEBUG)"));
8787

88+
static cl::opt<unsigned> GatherOptSearchLimit(
89+
"aarch64-search-limit", cl::Hidden, cl::init(2048),
90+
cl::desc("Restrict range of instructions to search for the "
91+
"machine-combiner gather pattern optimization"));
92+
8893
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
8994
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
9095
AArch64::CATCHRET),
@@ -7414,39 +7419,6 @@ static bool getMiscPatterns(MachineInstr &Root,
74147419
return false;
74157420
}
74167421

7417-
/// Collect all loads, stores and calls between `First` and `Last`.
7418-
/// `First` and `Last` must be within the same MBB.
7419-
static void getMemOps(const MachineInstr *First, const MachineInstr *Last,
7420-
SmallVectorImpl<const MachineInstr *> &MemOps) {
7421-
if (!First || !Last || First == Last)
7422-
return;
7423-
7424-
// Both instructions must be in the same basic block.
7425-
if (First->getParent() != Last->getParent())
7426-
return;
7427-
7428-
const MachineBasicBlock *MBB = First->getParent();
7429-
auto InstrIt = First->getIterator();
7430-
auto LastIt = Last->getIterator();
7431-
MemOps.push_back(First);
7432-
7433-
for (; InstrIt != MBB->end(); ++InstrIt) {
7434-
if (InstrIt == LastIt)
7435-
break;
7436-
7437-
// Check for stores or calls that could interfere
7438-
if (InstrIt->mayLoadOrStore() || InstrIt->isCall())
7439-
MemOps.push_back(&*InstrIt);
7440-
}
7441-
7442-
// If we have not iterated to `Last` the instructions must have not been
7443-
// ordered correctly.
7444-
assert(&*InstrIt == Last &&
7445-
"Got bad machine instructions, First should come before Last!");
7446-
7447-
MemOps.push_back(Last);
7448-
}
7449-
74507422
/// Check if a given MachineInstr `MIa` may alias with any of the instructions
74517423
/// in `MemInstrs`.
74527424
static bool mayAlias(const MachineInstr &MIa,
@@ -7505,13 +7477,13 @@ static bool getGatherPattern(MachineInstr &Root,
75057477
auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
75067478
auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
75077479
SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7508-
SmallSet<const MachineInstr *, 16> LoadInstrs = {};
7480+
SmallVector<const MachineInstr *, 16> LoadInstrs = {};
75097481
while (!RemainingLanes.empty() && CurrInstr &&
75107482
CurrInstr->getOpcode() == LoadLaneOpCode &&
75117483
MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
75127484
CurrInstr->getNumOperands() == 4) {
75137485
RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7514-
LoadInstrs.insert(CurrInstr);
7486+
LoadInstrs.push_back(CurrInstr);
75157487
CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
75167488
}
75177489

@@ -7533,38 +7505,38 @@ static bool getGatherPattern(MachineInstr &Root,
75337505
if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
75347506
return false;
75357507

7536-
LoadInstrs.insert(MRI.getUniqueVRegDef(Lane0LoadReg));
7537-
7538-
// Check for intervening stores or calls between the first and last load.
7539-
// Sort load instructions by program order.
7540-
SmallVector<const MachineInstr *, 16> SortedLoads(LoadInstrs.begin(),
7541-
LoadInstrs.end());
7542-
llvm::sort(SortedLoads, [](const MachineInstr *A, const MachineInstr *B) {
7543-
if (A->getParent() != B->getParent()) {
7544-
// If in different blocks, this shouldn't happen for gather patterns.
7545-
return false;
7546-
}
7547-
// Compare positions within the same basic block.
7548-
for (const MachineInstr &MI : *A->getParent()) {
7549-
if (&MI == A)
7550-
return true;
7551-
if (&MI == B)
7552-
return false;
7553-
}
7554-
return false;
7555-
});
7556-
7557-
const MachineInstr *FirstLoad = SortedLoads.front();
7558-
const MachineInstr *LastLoad = SortedLoads.back();
7559-
SmallVector<const MachineInstr *, 16> MemOps = {};
7560-
getMemOps(FirstLoad, LastLoad, MemOps);
7508+
LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
75617509

75627510
// If there is any chance of aliasing, do not apply the pattern.
7563-
for (const MachineInstr *MemInstr : MemOps) {
7564-
if (mayAlias(*MemInstr, MemOps, nullptr))
7511+
// Walk backward through the MBB starting from Root.
7512+
// Exit early if we've encountered all load instructions or hit the search
7513+
// limit.
7514+
auto MBBItr = Root.getIterator();
7515+
unsigned RemainingSteps = GatherOptSearchLimit;
7516+
SmallSet<const MachineInstr *, 16> RemainingLoadInstrs;
7517+
RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7518+
const MachineBasicBlock *MBB = Root.getParent();
7519+
7520+
for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7521+
!RemainingLoadInstrs.empty();
7522+
--MBBItr, --RemainingSteps) {
7523+
const MachineInstr &CurrInstr = *MBBItr;
7524+
7525+
// Remove this instruction from remaining loads if it's one we're tracking.
7526+
RemainingLoadInstrs.erase(&CurrInstr);
7527+
7528+
// Check for potential aliasing with any of the load instructions to
7529+
// optimize.
7530+
if ((CurrInstr.mayLoadOrStore() || CurrInstr.isCall()) &&
7531+
mayAlias(CurrInstr, LoadInstrs, nullptr))
75657532
return false;
75667533
}
75677534

7535+
// If we hit the search limit without finding all load instructions,
7536+
// don't match the pattern.
7537+
if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7538+
return false;
7539+
75687540
switch (NumLanes) {
75697541
case 4:
75707542
Patterns.push_back(AArch64MachineCombinerPattern::GATHER_LANE_i32);
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# RUN: llc -run-pass=machine-combiner -aarch64-search-limit=2 -mcpu=neoverse-n2 -mtriple=aarch64-none-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s
2+
3+
---
4+
name: negative_pattern_mbb_too_large
5+
body: |
6+
bb.0.entry:
7+
liveins: $x0, $x1, $x2, $x3, $x4
8+
9+
; CHECK-LABEL: name: negative_pattern_mbb_too_large
10+
; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
11+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1
12+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64common = COPY $x2
13+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64common = COPY $x3
14+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64common = COPY $x4
15+
; CHECK-NEXT: [[LD_i32:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[COPY1]], 0, 1
16+
; CHECK-NEXT: [[FIRST_REG:%[0-9]+]]:fpr128 = SUBREG_TO_REG 0, killed [[LD_i32]], %subreg.ssub
17+
; CHECK-NEXT: [[LD_LANE_1:%[0-9]+]]:fpr128 = LD1i32 [[FIRST_REG]], 1, killed [[COPY2]]
18+
; CHECK-NEXT: [[LD_LANE_2:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_1]], 2, killed [[COPY3]]
19+
; CHECK-NEXT: [[LD_LANE_3:%[0-9]+]]:fpr128 = LD1i32 [[LD_LANE_2]], 3, killed [[COPY4]]
20+
; CHECK-NEXT: $q0 = COPY [[LD_LANE_3]]
21+
; CHECK-NEXT: RET_ReallyLR implicit $q0
22+
%0:gpr64common = COPY $x0
23+
%1:gpr64common = COPY $x1
24+
%2:gpr64common = COPY $x2
25+
%3:gpr64common = COPY $x3
26+
%4:gpr64common = COPY $x4
27+
%5:fpr32 = LDRSroX %0, killed %1, 0, 1
28+
%6:fpr128 = SUBREG_TO_REG 0, killed %5, %subreg.ssub
29+
%7:fpr128 = LD1i32 %6, 1, killed %2
30+
%8:fpr128 = LD1i32 %7, 2, killed %3
31+
%9:fpr128 = LD1i32 %8, 3, killed %4
32+
$q0 = COPY %9
33+
RET_ReallyLR implicit $q0

0 commit comments

Comments
 (0)