Skip to content

Commit 1aa769a

Browse files
committed
[ARM] Create DeadRegisterPass for ARM
Basically, what this does is that it turns adds, subs, ands, xors, etc, where the dest register is dead to the comparison function. The purpose of this pass is to do what we did for AArch64, but we don't have the zero-reg. So replace it with the throwaway version.
1 parent 1780e16 commit 1aa769a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+4048
-2042
lines changed

llvm/lib/Target/ARM/ARM.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ FunctionPass *createA15SDOptimizerPass();
4141
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
4242
FunctionPass *createARMExpandPseudoPass();
4343
FunctionPass *createARMBranchTargetsPass();
44+
FunctionPass *createARMDeadRegisterDefinitions();
4445
FunctionPass *createARMConstantIslandPass();
4546
FunctionPass *createMLxExpansionPass();
4647
FunctionPass *createThumb2ITBlockPass();
@@ -66,6 +67,7 @@ void initializeARMBlockPlacementPass(PassRegistry &);
6667
void initializeARMBranchTargetsPass(PassRegistry &);
6768
void initializeARMConstantIslandsPass(PassRegistry &);
6869
void initializeARMDAGToDAGISelLegacyPass(PassRegistry &);
70+
void initializeARMDeadRegisterDefinitionsPass(PassRegistry &);
6971
void initializeARMExpandPseudoPass(PassRegistry &);
7072
void initializeARMFixCortexA57AES1742098Pass(PassRegistry &);
7173
void initializeARMLoadStoreOptPass(PassRegistry &);
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
//==-- ARMDeadRegisterDefinitions.cpp - Convert dead dests to compares --==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
/// \file When allowed by the instruction, replace dead definitions with compare
9+
/// instructions.
10+
//===----------------------------------------------------------------------===//
11+
12+
#include "ARM.h"
13+
#include "ARMBaseInstrInfo.h"
14+
#include "ARMBaseRegisterInfo.h"
15+
#include "llvm/ADT/Statistic.h"
16+
#include "llvm/ADT/StringRef.h"
17+
#include "llvm/CodeGen/MachineFunction.h"
18+
#include "llvm/CodeGen/MachineFunctionPass.h"
19+
#include "llvm/CodeGen/MachineInstr.h"
20+
#include "llvm/CodeGen/MachineInstrBuilder.h"
21+
#include "llvm/CodeGen/MachineRegisterInfo.h"
22+
#include "llvm/CodeGen/TargetInstrInfo.h"
23+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
24+
#include "llvm/Pass.h"
25+
#include "llvm/Support/Debug.h"
26+
#include "llvm/Support/raw_ostream.h"
27+
#include <optional>
28+
using namespace llvm;
29+
30+
#define DEBUG_TYPE "arm-dead-defs-to-cmp"
31+
32+
STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
33+
34+
#define ARM_DEAD_REG_DEF_NAME "ARM Convert dead defs to compares"
35+
36+
namespace {
37+
class ARMDeadRegisterDefinitions : public MachineFunctionPass {
38+
private:
39+
const TargetRegisterInfo *TRI;
40+
const MachineRegisterInfo *MRI;
41+
const TargetInstrInfo *TII;
42+
bool Changed;
43+
void processMachineBasicBlock(MachineBasicBlock &MBB);
44+
45+
public:
46+
static char ID; // Pass identification, replacement for typeid.
47+
ARMDeadRegisterDefinitions() : MachineFunctionPass(ID) {}
48+
49+
bool runOnMachineFunction(MachineFunction &F) override;
50+
51+
StringRef getPassName() const override { return ARM_DEAD_REG_DEF_NAME; }
52+
53+
void getAnalysisUsage(AnalysisUsage &AU) const override {
54+
AU.setPreservesCFG();
55+
MachineFunctionPass::getAnalysisUsage(AU);
56+
}
57+
};
58+
char ARMDeadRegisterDefinitions::ID = 0;
59+
} // end anonymous namespace
60+
61+
INITIALIZE_PASS(ARMDeadRegisterDefinitions, "arm-dead-defs-to-cmp",
62+
ARM_DEAD_REG_DEF_NAME, false, false)
63+
64+
static bool usesFrameIndex(const MachineInstr &MI) {
65+
for (const MachineOperand &MO : MI.uses())
66+
if (MO.isFI())
67+
return true;
68+
return false;
69+
}
70+
71+
static std::optional<unsigned> mapToCmpCmnTstTeqOpcode(unsigned Opc) {
72+
switch (Opc) {
73+
// ARM encodings
74+
case ARM::SUBri:
75+
return ARM::CMPri;
76+
case ARM::SUBrr:
77+
return ARM::CMPrr;
78+
case ARM::SUBrsi:
79+
return ARM::CMPrsi;
80+
case ARM::SUBrsr:
81+
return ARM::CMPrsr;
82+
83+
case ARM::ADDri:
84+
return ARM::CMNri;
85+
case ARM::ADDrr:
86+
return ARM::CMNzrr;
87+
case ARM::ADDrsi:
88+
return ARM::CMNzrsi;
89+
case ARM::ADDrsr:
90+
return ARM::CMNzrsr;
91+
92+
case ARM::ANDri:
93+
return ARM::TSTri;
94+
case ARM::ANDrr:
95+
return ARM::TSTrr;
96+
case ARM::ANDrsi:
97+
return ARM::TSTrsi;
98+
case ARM::ANDrsr:
99+
return ARM::TSTrsr;
100+
101+
case ARM::EORri:
102+
return ARM::TEQri;
103+
case ARM::EORrr:
104+
return ARM::TEQrr;
105+
case ARM::EORrsi:
106+
return ARM::TEQrsi;
107+
case ARM::EORrsr:
108+
return ARM::TEQrsr;
109+
110+
// Thumb2 encodings
111+
case ARM::t2SUBri:
112+
return ARM::t2CMPri;
113+
case ARM::t2SUBrr:
114+
return ARM::t2CMPrr;
115+
case ARM::t2SUBrs:
116+
return ARM::t2CMPrs;
117+
118+
case ARM::t2ADDri:
119+
return ARM::t2CMNri;
120+
case ARM::t2ADDrr:
121+
return ARM::t2CMNzrr;
122+
case ARM::t2ADDrs:
123+
return ARM::t2CMNzrs;
124+
125+
case ARM::t2ANDri:
126+
return ARM::t2TSTri;
127+
case ARM::t2ANDrr:
128+
return ARM::t2TSTrr;
129+
case ARM::t2ANDrs:
130+
return ARM::t2TSTrs;
131+
132+
case ARM::t2EORri:
133+
return ARM::t2TEQri;
134+
case ARM::t2EORrr:
135+
return ARM::t2TEQrr;
136+
case ARM::t2EORrs:
137+
return ARM::t2TEQrs;
138+
139+
// Thumb1 limited support
140+
case ARM::tSUBSrr:
141+
return ARM::tCMPr;
142+
case ARM::tSUBSi3:
143+
return ARM::tCMPi8;
144+
case ARM::tSUBSi8:
145+
return ARM::tCMPi8;
146+
case ARM::tAND:
147+
return ARM::tTST;
148+
default:
149+
return std::nullopt;
150+
}
151+
}
152+
153+
static void copyNonDefNonPredOperands(MachineInstr &Dst,
154+
const MachineInstr &Src) {
155+
const MCInstrDesc &Desc = Src.getDesc();
156+
int PIdx = Src.findFirstPredOperandIdx();
157+
unsigned Start = Desc.getNumDefs();
158+
unsigned End =
159+
(PIdx == -1) ? Src.getNumOperands() : static_cast<unsigned>(PIdx);
160+
for (unsigned I = Start; I < End; ++I)
161+
Dst.addOperand(Src.getOperand(I));
162+
if (PIdx != -1) {
163+
Dst.addOperand(Src.getOperand(PIdx));
164+
Dst.addOperand(Src.getOperand(PIdx + 1));
165+
}
166+
}
167+
168+
void ARMDeadRegisterDefinitions::processMachineBasicBlock(
169+
MachineBasicBlock &MBB) {
170+
// Early-increment range: iterator is advanced before the loop body, so it's
171+
// safe to erase the current instruction inside the loop.
172+
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
173+
if (usesFrameIndex(MI))
174+
continue;
175+
176+
// Only consider instructions that set CPSR (flag-setting variants).
177+
if (!ARMBaseInstrInfo::isCPSRDefined(MI))
178+
continue;
179+
180+
const MCInstrDesc &Desc = MI.getDesc();
181+
182+
for (int I = 0, EE = Desc.getNumDefs(); I != EE; ++I) {
183+
MachineOperand &MO = MI.getOperand(I);
184+
if (!MO.isReg() || !MO.isDef())
185+
continue;
186+
Register Reg = MO.getReg();
187+
if (!Reg.isVirtual() || (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
188+
continue;
189+
assert(!MO.isImplicit() && "Unexpected implicit def!");
190+
if (MI.isRegTiedToUseOperand(I))
191+
continue;
192+
193+
if (std::optional<unsigned> NewOpc =
194+
mapToCmpCmnTstTeqOpcode(MI.getOpcode())) {
195+
MachineInstrBuilder MIB =
196+
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(*NewOpc));
197+
copyNonDefNonPredOperands(*MIB, MI);
198+
MIB.setMIFlags(MI.getFlags());
199+
for (MachineMemOperand *MMO : MI.memoperands())
200+
MIB.addMemOperand(MMO);
201+
202+
MI.eraseFromParent();
203+
++NumDeadDefsReplaced;
204+
Changed = true;
205+
break;
206+
}
207+
}
208+
}
209+
}
210+
211+
// Scan the function for instructions that have a dead definition of a
212+
// register. Replace that instruction with a compare instruction when possible
213+
bool ARMDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
214+
if (skipFunction(MF.getFunction()))
215+
return false;
216+
217+
TRI = MF.getSubtarget().getRegisterInfo();
218+
TII = MF.getSubtarget().getInstrInfo();
219+
MRI = &MF.getRegInfo();
220+
LLVM_DEBUG(dbgs() << "***** ARMDeadRegisterDefinitions *****\n");
221+
Changed = false;
222+
for (auto &MBB : MF)
223+
processMachineBasicBlock(MBB);
224+
return Changed;
225+
}
226+
227+
FunctionPass *llvm::createARMDeadRegisterDefinitions() {
228+
return new ARMDeadRegisterDefinitions();
229+
}

llvm/lib/Target/ARM/ARMTargetMachine.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ static cl::opt<cl::boolOrDefault>
7979
EnableGlobalMerge("arm-global-merge", cl::Hidden,
8080
cl::desc("Enable the global merge pass"));
8181

82+
static cl::opt<bool> EnableARMDeadRegisterElimination(
83+
"arm-enable-dead-defs", cl::Hidden,
84+
cl::desc("Enable the pass that replaces"
85+
" dead-dest flag-setting ALU"
86+
" instructions with compares/tests"
87+
" pre-RA"),
88+
cl::init(true));
89+
8290
namespace llvm {
8391
void initializeARMExecutionDomainFixPass(PassRegistry&);
8492
}
@@ -510,6 +518,10 @@ bool ARMPassConfig::addGlobalInstructionSelect() {
510518

511519
void ARMPassConfig::addPreRegAlloc() {
512520
if (getOptLevel() != CodeGenOptLevel::None) {
521+
// Replace dead-dest flag-setting ALU with compares/tests pre-RA.
522+
if (EnableARMDeadRegisterElimination)
523+
addPass(createARMDeadRegisterDefinitions());
524+
513525
if (getOptLevel() == CodeGenOptLevel::Aggressive)
514526
addPass(&MachinePipelinerID);
515527

llvm/lib/Target/ARM/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ add_llvm_target(ARMCodeGen
3030
ARMCallLowering.cpp
3131
ARMConstantIslandPass.cpp
3232
ARMConstantPoolValue.cpp
33+
ARMDeadRegisterDefinitionsPass.cpp
3334
ARMExpandPseudoInsts.cpp
3435
ARMFastISel.cpp
3536
ARMFixCortexA57AES1742098Pass.cpp

llvm/test/CodeGen/ARM/O3-pipeline.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
; CHECK-NEXT: Machine code sinking
104104
; CHECK-NEXT: Peephole Optimizations
105105
; CHECK-NEXT: Remove dead machine instructions
106+
; CHECK-NEXT: ARM Convert dead defs to compares
106107
; CHECK-NEXT: MachineDominator Tree Construction
107108
; CHECK-NEXT: Slot index numbering
108109
; CHECK-NEXT: Live Interval Analysis

llvm/test/CodeGen/ARM/addsubcarry-promotion.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
1111
; ARM-LABEL: fn1:
1212
; ARM: @ %bb.0: @ %entry
1313
; ARM-NEXT: rsb r2, r2, #0
14-
; ARM-NEXT: adds r0, r1, r0
14+
; ARM-NEXT: cmn r1, r0
1515
; ARM-NEXT: movw r1, #65535
1616
; ARM-NEXT: sxth r2, r2
1717
; ARM-NEXT: adc r0, r2, #1
@@ -54,7 +54,7 @@ define void @fn1(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 {
5454
; THUMB-LABEL: fn1:
5555
; THUMB: @ %bb.0: @ %entry
5656
; THUMB-NEXT: rsbs r2, r2, #0
57-
; THUMB-NEXT: adds r0, r0, r1
57+
; THUMB-NEXT: cmn r1, r0
5858
; THUMB-NEXT: sxth r2, r2
5959
; THUMB-NEXT: adc r0, r2, #1
6060
; THUMB-NEXT: lsls r0, r0, #16

llvm/test/CodeGen/ARM/addsubo-legalization.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,15 @@ define <2 x i1> @uaddo(ptr %ptr, ptr %ptr2) {
1818
; CHECK-NEXT: vmov r6, r7, d19
1919
; CHECK-NEXT: vmov lr, r12, d16
2020
; CHECK-NEXT: vmov r4, r5, d17
21-
; CHECK-NEXT: subs.w r3, lr, r3
21+
; CHECK-NEXT: cmp lr, r3
2222
; CHECK-NEXT: sbcs.w r2, r12, r2
2323
; CHECK-NEXT: mov.w r2, #0
2424
; CHECK-NEXT: it lo
2525
; CHECK-NEXT: movlo r2, #1
2626
; CHECK-NEXT: cmp r2, #0
2727
; CHECK-NEXT: it ne
2828
; CHECK-NEXT: movne.w r2, #-1
29-
; CHECK-NEXT: subs r3, r4, r6
29+
; CHECK-NEXT: cmp r4, r6
3030
; CHECK-NEXT: sbcs.w r3, r5, r7
3131
; CHECK-NEXT: it lo
3232
; CHECK-NEXT: movlo r1, #1
@@ -57,15 +57,15 @@ define <2 x i1> @usubo(ptr %ptr, ptr %ptr2) {
5757
; CHECK-NEXT: vmov r4, r5, d19
5858
; CHECK-NEXT: vmov r3, r2, d16
5959
; CHECK-NEXT: vmov r6, r7, d17
60-
; CHECK-NEXT: subs.w r3, lr, r3
60+
; CHECK-NEXT: cmp lr, r3
6161
; CHECK-NEXT: sbcs.w r2, r12, r2
6262
; CHECK-NEXT: mov.w r2, #0
6363
; CHECK-NEXT: it lo
6464
; CHECK-NEXT: movlo r2, #1
6565
; CHECK-NEXT: cmp r2, #0
6666
; CHECK-NEXT: it ne
6767
; CHECK-NEXT: movne.w r2, #-1
68-
; CHECK-NEXT: subs r3, r4, r6
68+
; CHECK-NEXT: cmp r4, r6
6969
; CHECK-NEXT: sbcs.w r3, r5, r7
7070
; CHECK-NEXT: it lo
7171
; CHECK-NEXT: movlo r1, #1

0 commit comments

Comments
 (0)