Skip to content

Commit d1115c2

Browse files
author
esmeyi
committed
[PowerPC] Optimize compare by using record form in post-RA.
Summary: We currently optimize the comparison only in SSA, therefore we will miss some optimization opportunities where the input of comparison is lowered from COPY in post-RA. Ie. ExpandPostRA::LowerCopy is called after PPCInstrInfo::optimizeCompareInstr. This patch optimizes the comparison in post-RA and only the cases that compare against zero can be handled. D131374 converts the comparison and its user to a compare against zero with the appropriate predicate on the branch, which creates additional opportunities for this patch. Reviewed By: shchenz, lkail Differential Revision: https://reviews.llvm.org/D131873
1 parent cb33ef7 commit d1115c2

File tree

5 files changed

+242
-9
lines changed

5 files changed

+242
-9
lines changed

llvm/lib/Target/PowerPC/PPCInstrInfo.cpp

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,85 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
27682768
return true;
27692769
}
27702770

2771+
bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {
2772+
MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();
2773+
if (MRI->isSSA())
2774+
return false;
2775+
2776+
Register SrcReg, SrcReg2;
2777+
int64_t CmpMask, CmpValue;
2778+
if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))
2779+
return false;
2780+
2781+
// Try to optimize the comparison against 0.
2782+
if (CmpValue || !CmpMask || SrcReg2)
2783+
return false;
2784+
2785+
// The record forms set the condition register based on a signed comparison
2786+
// with zero (see comments in optimizeCompareInstr). Since we can't do the
2787+
// equality checks in post-RA, we are more restricted on a unsigned
2788+
// comparison.
2789+
unsigned Opc = CmpMI.getOpcode();
2790+
if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)
2791+
return false;
2792+
2793+
// The record forms are always based on a 64-bit comparison on PPC64
2794+
// (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit
2795+
// comparison. Since we can't do the equality checks in post-RA, we bail out
2796+
// the case.
2797+
if (Subtarget.isPPC64() && Opc == PPC::CMPWI)
2798+
return false;
2799+
2800+
// CmpMI can't be deleted if it has implicit def.
2801+
if (CmpMI.hasImplicitDef())
2802+
return false;
2803+
2804+
bool SrcRegHasOtherUse = false;
2805+
MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);
2806+
if (!SrcMI || !SrcMI->definesRegister(SrcReg))
2807+
return false;
2808+
2809+
MachineOperand RegMO = CmpMI.getOperand(0);
2810+
Register CRReg = RegMO.getReg();
2811+
if (CRReg != PPC::CR0)
2812+
return false;
2813+
2814+
// Make sure there is no def/use of CRReg between SrcMI and CmpMI.
2815+
bool SeenUseOfCRReg = false;
2816+
bool IsCRRegKilled = false;
2817+
if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,
2818+
SeenUseOfCRReg) ||
2819+
SrcMI->definesRegister(CRReg) || SeenUseOfCRReg)
2820+
return false;
2821+
2822+
int SrcMIOpc = SrcMI->getOpcode();
2823+
int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);
2824+
if (NewOpC == -1)
2825+
return false;
2826+
2827+
LLVM_DEBUG(dbgs() << "Replace Instr: ");
2828+
LLVM_DEBUG(SrcMI->dump());
2829+
2830+
const MCInstrDesc &NewDesc = get(NewOpC);
2831+
SrcMI->setDesc(NewDesc);
2832+
MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)
2833+
.addReg(CRReg, RegState::ImplicitDefine);
2834+
SrcMI->clearRegisterDeads(CRReg);
2835+
2836+
// Fix up killed/dead flag for SrcReg after transformation.
2837+
if (SrcRegHasOtherUse || CmpMI.getOperand(1).isKill())
2838+
fixupIsDeadOrKill(SrcMI, &CmpMI, SrcReg);
2839+
2840+
assert(SrcMI->definesRegister(PPC::CR0) &&
2841+
"Record-form instruction does not define cr0?");
2842+
2843+
LLVM_DEBUG(dbgs() << "with: ");
2844+
LLVM_DEBUG(SrcMI->dump());
2845+
LLVM_DEBUG(dbgs() << "Delete dead instruction: ");
2846+
LLVM_DEBUG(CmpMI.dump());
2847+
return true;
2848+
}
2849+
27712850
bool PPCInstrInfo::getMemOperandsWithOffsetWidth(
27722851
const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
27732852
int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
@@ -4427,7 +4506,7 @@ bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,
44274506
bool PPCInstrInfo::isRegElgibleForForwarding(
44284507
const MachineOperand &RegMO, const MachineInstr &DefMI,
44294508
const MachineInstr &MI, bool KillDefMI,
4430-
bool &IsFwdFeederRegKilled) const {
4509+
bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {
44314510
// x = addi y, imm
44324511
// ...
44334512
// z = lfdx 0, x -> z = lfd imm(y)
@@ -4449,6 +4528,8 @@ bool PPCInstrInfo::isRegElgibleForForwarding(
44494528
return false;
44504529
else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
44514530
IsFwdFeederRegKilled = true;
4531+
if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)
4532+
SeenIntermediateUse = true;
44524533
// Made it to DefMI without encountering a clobber.
44534534
if ((&*It) == &DefMI)
44544535
break;
@@ -4888,9 +4969,10 @@ bool PPCInstrInfo::transformToImmFormFedByAdd(
48884969
return false;
48894970

48904971
bool IsFwdFeederRegKilled = false;
4972+
bool SeenIntermediateUse = false;
48914973
// Check if the RegMO can be forwarded to MI.
48924974
if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,
4893-
IsFwdFeederRegKilled))
4975+
IsFwdFeederRegKilled, SeenIntermediateUse))
48944976
return false;
48954977

48964978
// Get killed info in case fixup needed after transformation.

llvm/lib/Target/PowerPC/PPCInstrInfo.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
251251
bool isRegElgibleForForwarding(const MachineOperand &RegMO,
252252
const MachineInstr &DefMI,
253253
const MachineInstr &MI, bool KillDefMI,
254-
bool &IsFwdFeederRegKilled) const;
254+
bool &IsFwdFeederRegKilled,
255+
bool &SeenIntermediateUse) const;
255256
unsigned getSpillTarget() const;
256257
const unsigned *getStoreOpcodesForSpillArray() const;
257258
const unsigned *getLoadOpcodesForSpillArray() const;
@@ -644,6 +645,8 @@ class PPCInstrInfo : public PPCGenInstrInfo {
644645
int64_t &Offset, unsigned &Width,
645646
const TargetRegisterInfo *TRI) const;
646647

648+
bool optimizeCmpPostRA(MachineInstr &MI) const;
649+
647650
/// Get the base operand and byte offset of an instruction that reads/writes
648651
/// memory.
649652
bool getMemOperandsWithOffsetWidth(

llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ STATISTIC(NumberOfSelfCopies,
3838
"Number of self copy instructions eliminated");
3939
STATISTIC(NumFrameOffFoldInPreEmit,
4040
"Number of folding frame offset by using r+r in pre-emit peephole");
41+
STATISTIC(NumCmpsInPreEmit,
42+
"Number of compares eliminated in pre-emit peephole");
4143

4244
static cl::opt<bool>
4345
EnablePCRelLinkerOpt("ppc-pcrel-linker-opt", cl::Hidden, cl::init(true),
@@ -508,6 +510,13 @@ static bool hasPCRelativeForm(MachineInstr &Use) {
508510
LLVM_DEBUG(dbgs() << "Frame offset folding by using index form: ");
509511
LLVM_DEBUG(MI.dump());
510512
}
513+
if (TII->optimizeCmpPostRA(MI)) {
514+
Changed = true;
515+
NumCmpsInPreEmit++;
516+
LLVM_DEBUG(dbgs() << "Optimize compare by using record form: ");
517+
LLVM_DEBUG(MI.dump());
518+
InstrsToErase.push_back(&MI);
519+
}
511520
}
512521

513522
// Eliminate conditional branch based on a constant CR bit by
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# RUN: llc -mtriple=powerpc64le-linux-gnu -stop-after ppc-pre-emit-peephole %s -o - -verify-machineinstrs | FileCheck %s
2+
3+
---
4+
name: test1
5+
# The cmp instr is optimized with the record form.
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0.entry:
9+
successors: %bb.1(0x30000000), %bb.2(0x50000000)
10+
liveins: $x3, $x4
11+
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
12+
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3
13+
; CHECK-LABEL: name: test1
14+
; CHECK: renamable $x3 = OR8_rec renamable $x3, killed renamable $x4, implicit-def $cr0
15+
; CHECK-NOT: CMPDI
16+
BCC 68, killed renamable $cr0, %bb.2
17+
18+
bb.1:
19+
$x3 = LI8 102
20+
BLR8 implicit $lr8, implicit $rm, implicit $x3
21+
22+
bb.2:
23+
$x3 = LI8 116
24+
BLR8 implicit $lr8, implicit $rm, implicit $x3
25+
...
26+
27+
---
28+
name: test2
29+
# The imm of the comparison instr isn't 0.
30+
tracksRegLiveness: true
31+
body: |
32+
bb.0.entry:
33+
successors: %bb.1(0x30000000), %bb.2(0x50000000)
34+
liveins: $x3, $x4
35+
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
36+
renamable $cr0 = CMPDI renamable $x3, 2, implicit killed $x3
37+
; CHECK-LABEL: name: test2
38+
; CHECK: CMPDI
39+
BCC 68, killed renamable $cr0, %bb.2
40+
41+
bb.1:
42+
$x3 = LI8 102
43+
BLR8 implicit $lr8, implicit $rm, implicit $x3
44+
45+
bb.2:
46+
$x3 = LI8 116
47+
BLR8 implicit $lr8, implicit $rm, implicit $x3
48+
...
49+
50+
---
51+
name: test3
52+
# The comparison instr has a implicit def.
53+
tracksRegLiveness: true
54+
body: |
55+
bb.0.entry:
56+
successors: %bb.1(0x30000000), %bb.2(0x50000000)
57+
liveins: $x3, $x4
58+
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
59+
renamable $cr0 = CMPDI renamable $x3, 0, implicit-def $x3
60+
; CHECK-LABEL: name: test3
61+
; CHECK: CMPDI
62+
BCC 68, killed renamable $cr0, %bb.2
63+
64+
bb.1:
65+
$x3 = LI8 102
66+
BLR8 implicit $lr8, implicit $rm, implicit $x3
67+
68+
bb.2:
69+
$x3 = LI8 116
70+
BLR8 implicit $lr8, implicit $rm, implicit $x3
71+
...
72+
73+
---
74+
name: test4
75+
# There is another use for cr0 between OR8 instr and CMPWI instr.
76+
tracksRegLiveness: true
77+
body: |
78+
bb.0.entry:
79+
successors: %bb.1(0x30000000), %bb.2(0x50000000)
80+
liveins: $x3, $x4, $cr0
81+
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
82+
renamable $cr1 = MCRF killed $cr0, implicit $x3
83+
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
84+
; CHECK-LABEL: name: test4
85+
; CHECK: CMPDI
86+
BCC 68, killed renamable $cr0, %bb.2
87+
88+
bb.1:
89+
$x3 = LI8 102
90+
BLR8 implicit $lr8, implicit $rm, implicit $x3
91+
92+
bb.2:
93+
$x3 = LI8 116
94+
BLR8 implicit $lr8, implicit $rm, implicit $x3
95+
...
96+
97+
---
98+
name: test5
99+
# There is another def for cr0 between OR8 instr and CMPWI instr.
100+
tracksRegLiveness: true
101+
body: |
102+
bb.0.entry:
103+
successors: %bb.1(0x30000000), %bb.2(0x50000000)
104+
liveins: $x3, $x4
105+
renamable $x3 = OR8 killed renamable $x3, renamable $x4
106+
renamable $cr1 = CMPD renamable $x3, renamable $x4, implicit-def $cr0
107+
renamable $cr0 = CMPDI renamable $x3, 0, implicit killed $x3, implicit $cr1
108+
; CHECK-LABEL: name: test5
109+
; CHECK: CMPDI
110+
BCC 68, killed renamable $cr0, %bb.2
111+
112+
bb.1:
113+
$x3 = LI8 102
114+
BLR8 implicit $lr8, implicit $rm, implicit $x3
115+
116+
bb.2:
117+
$x3 = LI8 116
118+
BLR8 implicit $lr8, implicit $rm, implicit $x3
119+
...
120+
121+
---
122+
name: test6
123+
# The SrcReg isn't CR0.
124+
tracksRegLiveness: true
125+
body: |
126+
bb.0.entry:
127+
successors: %bb.1(0x30000000), %bb.2(0x50000000)
128+
liveins: $x3, $x4
129+
renamable $x3 = OR8 killed renamable $x3, killed renamable $x4
130+
renamable $cr1 = CMPDI renamable $x3, 0, implicit killed $x3
131+
; CHECK-LABEL: name: test6
132+
; CHECK: CMPDI
133+
BCC 68, killed renamable $cr1, %bb.2
134+
135+
bb.1:
136+
$x3 = LI8 102
137+
BLR8 implicit $lr8, implicit $rm, implicit $x3
138+
139+
bb.2:
140+
$x3 = LI8 116
141+
BLR8 implicit $lr8, implicit $rm, implicit $x3
142+
...

llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2946,10 +2946,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
29462946
; LE-P10-O0-NEXT: std r0, 16(r1)
29472947
; LE-P10-O0-NEXT: hashst r0, -8(r1)
29482948
; LE-P10-O0-NEXT: stdu r1, -64(r1)
2949-
; LE-P10-O0-NEXT: mr r4, r3
2949+
; LE-P10-O0-NEXT: mr. r4, r3
29502950
; LE-P10-O0-NEXT: std r4, 40(r1) # 8-byte Folded Spill
29512951
; LE-P10-O0-NEXT: li r3, 0
2952-
; LE-P10-O0-NEXT: cmpdi r4, 0
29532952
; LE-P10-O0-NEXT: stw r3, 48(r1) # 4-byte Folded Spill
29542953
; LE-P10-O0-NEXT: beq cr0, .LBB2_2
29552954
; LE-P10-O0-NEXT: # %bb.1: # %if.end
@@ -2979,10 +2978,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
29792978
; LE-P9-O0-NEXT: std r0, 16(r1)
29802979
; LE-P9-O0-NEXT: hashst r0, -8(r1)
29812980
; LE-P9-O0-NEXT: stdu r1, -128(r1)
2982-
; LE-P9-O0-NEXT: mr r4, r3
2981+
; LE-P9-O0-NEXT: mr. r4, r3
29832982
; LE-P9-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
29842983
; LE-P9-O0-NEXT: li r3, 0
2985-
; LE-P9-O0-NEXT: cmpdi r4, 0
29862984
; LE-P9-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
29872985
; LE-P9-O0-NEXT: beq cr0, .LBB2_2
29882986
; LE-P9-O0-NEXT: # %bb.1: # %if.end
@@ -3012,10 +3010,9 @@ define dso_local zeroext i32 @shrinkwrap(ptr readonly %in) #0 {
30123010
; LE-P8-O0-NEXT: std r0, 16(r1)
30133011
; LE-P8-O0-NEXT: hashst r0, -8(r1)
30143012
; LE-P8-O0-NEXT: stdu r1, -128(r1)
3015-
; LE-P8-O0-NEXT: mr r4, r3
3013+
; LE-P8-O0-NEXT: mr. r4, r3
30163014
; LE-P8-O0-NEXT: std r4, 104(r1) # 8-byte Folded Spill
30173015
; LE-P8-O0-NEXT: li r3, 0
3018-
; LE-P8-O0-NEXT: cmpdi r4, 0
30193016
; LE-P8-O0-NEXT: stw r3, 112(r1) # 4-byte Folded Spill
30203017
; LE-P8-O0-NEXT: beq cr0, .LBB2_2
30213018
; LE-P8-O0-NEXT: # %bb.1: # %if.end

0 commit comments

Comments
 (0)