Skip to content

Commit 2c4f078

Browse files
committed
[X86] Support LEA64_32r in processInstrForSlow3OpLEA and use INC/DEC when possible.
Move the erasing and iterator updating inside to match the other slow LEA function. I've adapted code from optTwoAddrLEA and basically rebuilt the implementation here. We do lose the kill flags now just like optTwoAddrLEA. This runs late enough in the pipeline that shouldn't really be a problem. llvm-svn: 373877
1 parent 6088f84 commit 2c4f078

File tree

4 files changed

+125
-95
lines changed

4 files changed

+125
-95
lines changed

llvm/lib/Target/X86/X86FixupLEAs.cpp

Lines changed: 110 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunctionPass {
6767
/// - LEA that uses RIP relative addressing mode
6868
/// - LEA that uses 16-bit addressing mode "
6969
/// This function currently handles the first 2 cases only.
70-
MachineInstr *processInstrForSlow3OpLEA(MachineInstr &MI,
71-
MachineBasicBlock &MBB);
70+
void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
71+
MachineBasicBlock &MBB, bool OptIncDec);
7272

7373
/// Look for LEAs that are really two address LEAs that we might be able to
7474
/// turn into regular ADD instructions.
@@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
216216
if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
217217
continue;
218218

219-
if (IsSlowLEA) {
219+
if (IsSlowLEA)
220220
processInstructionForSlowLEA(I, MBB);
221-
} else if (IsSlow3OpsLEA) {
222-
if (auto *NewMI = processInstrForSlow3OpLEA(*I, MBB)) {
223-
MBB.erase(I);
224-
I = NewMI;
225-
}
226-
}
221+
else if (IsSlow3OpsLEA)
222+
processInstrForSlow3OpLEA(I, MBB, OptIncDec);
227223
}
228224

229225
// Second pass for creating LEAs. This may reverse some of the
@@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(unsigned Reg) {
301297
Reg == X86::R13D || Reg == X86::R13;
302298
}
303299

304-
static inline bool isRegOperand(const MachineOperand &Op) {
305-
return Op.isReg() && Op.getReg() != X86::NoRegister;
306-
}
307-
308300
/// Returns true if this LEA uses base an index registers, and the base register
309301
/// is known to be inefficient for the subtarget.
310302
// TODO: use a variant scheduling class to model the latency profile
311303
// of LEA instructions, and implement this logic as a scheduling predicate.
312304
static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
313305
const MachineOperand &Index) {
314-
return Base.isReg() && isInefficientLEAReg(Base.getReg()) &&
315-
isRegOperand(Index);
306+
return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
307+
Index.getReg() != X86::NoRegister;
316308
}
317309

318310
static inline bool hasLEAOffset(const MachineOperand &Offset) {
@@ -534,112 +526,150 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
534526
}
535527
}
536528

537-
MachineInstr *
538-
FixupLEAPass::processInstrForSlow3OpLEA(MachineInstr &MI,
539-
MachineBasicBlock &MBB) {
529+
void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
530+
MachineBasicBlock &MBB,
531+
bool OptIncDec) {
532+
MachineInstr &MI = *I;
540533
const unsigned LEAOpcode = MI.getOpcode();
541534

542-
const MachineOperand &Dst = MI.getOperand(0);
535+
const MachineOperand &Dest = MI.getOperand(0);
543536
const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);
544537
const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);
545538
const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);
546539
const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);
547540
const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
548541

549-
if (!(TII->isThreeOperandsLEA(MI) ||
550-
hasInefficientLEABaseReg(Base, Index)) ||
542+
if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
551543
!TII->isSafeToClobberEFLAGS(MBB, MI) ||
552544
Segment.getReg() != X86::NoRegister)
553-
return nullptr;
545+
return;
546+
547+
Register DestReg = Dest.getReg();
548+
Register BaseReg = Base.getReg();
549+
Register IndexReg = Index.getReg();
550+
551+
if (MI.getOpcode() == X86::LEA64_32r) {
552+
if (BaseReg != 0)
553+
BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
554+
if (IndexReg != 0)
555+
IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
556+
}
554557

555-
Register DstR = Dst.getReg();
556-
Register BaseR = Base.getReg();
557-
Register IndexR = Index.getReg();
558-
Register SSDstR =
559-
(LEAOpcode == X86::LEA64_32r) ? Register(getX86SubSuperRegister(DstR, 64))
560-
: DstR;
561558
bool IsScale1 = Scale.getImm() == 1;
562-
bool IsInefficientBase = isInefficientLEAReg(BaseR);
563-
bool IsInefficientIndex = isInefficientLEAReg(IndexR);
559+
bool IsInefficientBase = isInefficientLEAReg(BaseReg);
560+
bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
564561

565562
// Skip these cases since it takes more than 2 instructions
566563
// to replace the LEA instruction.
567-
if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
568-
return nullptr;
569-
if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
570-
(IsInefficientIndex || !IsScale1))
571-
return nullptr;
572-
573-
const DebugLoc DL = MI.getDebugLoc();
574-
const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(LEAOpcode));
575-
const MCInstrDesc &ADDri = TII->get(getADDriFromLEA(LEAOpcode, Offset));
564+
if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
565+
return;
576566

577567
LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
578568
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
579569

570+
MachineInstr *NewMI = nullptr;
571+
580572
// First try to replace LEA with one or two (for the 3-op LEA case)
581573
// add instructions:
582574
// 1.lea (%base,%index,1), %base => add %index,%base
583575
// 2.lea (%base,%index,1), %index => add %base,%index
584-
if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
585-
const MachineOperand &Src = DstR == BaseR ? Index : Base;
586-
MachineInstr *NewMI =
587-
BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Src);
588-
LLVM_DEBUG(NewMI->dump(););
589-
// Create ADD instruction for the Offset in case of 3-Ops LEA.
590-
if (hasLEAOffset(Offset)) {
591-
NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
592-
LLVM_DEBUG(NewMI->dump(););
576+
if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
577+
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
578+
if (DestReg != BaseReg)
579+
std::swap(BaseReg, IndexReg);
580+
581+
if (MI.getOpcode() == X86::LEA64_32r) {
582+
// TODO: Do we need the super register implicit use?
583+
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
584+
.addReg(BaseReg)
585+
.addReg(IndexReg)
586+
.addReg(Base.getReg(), RegState::Implicit)
587+
.addReg(Index.getReg(), RegState::Implicit);
588+
} else {
589+
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
590+
.addReg(BaseReg)
591+
.addReg(IndexReg);
593592
}
594-
return NewMI;
595-
}
596-
// If the base is inefficient try switching the index and base operands,
597-
// otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
598-
// lea offset(%base,%index,scale),%dst =>
599-
// lea (%base,%index,scale); add offset,%dst
600-
if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
601-
MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
602-
.add(Dst)
603-
.add(IsInefficientBase ? Index : Base)
604-
.add(Scale)
605-
.add(IsInefficientBase ? Base : Index)
606-
.addImm(0)
607-
.add(Segment);
593+
} else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
594+
// If the base is inefficient try switching the index and base operands,
595+
// otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
596+
// lea offset(%base,%index,scale),%dst =>
597+
// lea (%base,%index,scale); add offset,%dst
598+
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
599+
.add(Dest)
600+
.add(IsInefficientBase ? Index : Base)
601+
.add(Scale)
602+
.add(IsInefficientBase ? Base : Index)
603+
.addImm(0)
604+
.add(Segment);
608605
LLVM_DEBUG(NewMI->dump(););
606+
}
607+
608+
// If either replacement succeeded above, add the offset if needed, then
609+
// replace the instruction.
610+
if (NewMI) {
609611
// Create ADD instruction for the Offset in case of 3-Ops LEA.
610612
if (hasLEAOffset(Offset)) {
611-
NewMI = BuildMI(MBB, MI, DL, ADDri, DstR).addReg(DstR).add(Offset);
612-
LLVM_DEBUG(NewMI->dump(););
613+
if (OptIncDec && Offset.isImm() &&
614+
(Offset.getImm() == 1 || Offset.getImm() == -1)) {
615+
unsigned NewOpc =
616+
getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
617+
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
618+
.addReg(DestReg);
619+
LLVM_DEBUG(NewMI->dump(););
620+
} else {
621+
unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
622+
NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
623+
.addReg(DestReg)
624+
.add(Offset);
625+
LLVM_DEBUG(NewMI->dump(););
626+
}
613627
}
614-
return NewMI;
628+
629+
MBB.erase(I);
630+
I = NewMI;
631+
return;
615632
}
633+
616634
// Handle the rest of the cases with inefficient base register:
617-
assert(SSDstR != BaseR && "SSDstR == BaseR should be handled already!");
635+
assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
618636
assert(IsInefficientBase && "efficient base should be handled already!");
619637

638+
// FIXME: Handle LEA64_32r.
639+
if (LEAOpcode == X86::LEA64_32r)
640+
return;
641+
620642
// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
621643
if (IsScale1 && !hasLEAOffset(Offset)) {
622-
bool BIK = Base.isKill() && BaseR != IndexR;
623-
TII->copyPhysReg(MBB, MI, DL, DstR, BaseR, BIK);
644+
bool BIK = Base.isKill() && BaseReg != IndexReg;
645+
TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
624646
LLVM_DEBUG(MI.getPrevNode()->dump(););
625647

626-
MachineInstr *NewMI =
627-
BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Index);
648+
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
649+
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
650+
.addReg(DestReg)
651+
.add(Index);
628652
LLVM_DEBUG(NewMI->dump(););
629-
return NewMI;
653+
return;
630654
}
655+
631656
// lea offset(%base,%index,scale), %dst =>
632657
// lea offset( ,%index,scale), %dst; add %base,%dst
633-
MachineInstr *NewMI = BuildMI(MBB, MI, DL, TII->get(LEAOpcode))
634-
.add(Dst)
635-
.addReg(0)
636-
.add(Scale)
637-
.add(Index)
638-
.add(Offset)
639-
.add(Segment);
658+
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
659+
.add(Dest)
660+
.addReg(0)
661+
.add(Scale)
662+
.add(Index)
663+
.add(Offset)
664+
.add(Segment);
640665
LLVM_DEBUG(NewMI->dump(););
641666

642-
NewMI = BuildMI(MBB, MI, DL, ADDrr, DstR).addReg(DstR).add(Base);
667+
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
668+
NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
669+
.addReg(DestReg)
670+
.add(Base);
643671
LLVM_DEBUG(NewMI->dump(););
644-
return NewMI;
672+
673+
MBB.erase(I);
674+
I = NewMI;
645675
}

llvm/test/CodeGen/X86/leaFixup32.mir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ frameInfo:
104104
body: |
105105
bb.0 (%ir-block.0):
106106
liveins: $eax, $ebp
107-
; CHECK: $eax = ADD32rr $eax, killed $ebp
107+
; CHECK: $eax = ADD32rr $eax, $ebp
108108
; CHECK: $eax = ADD32ri8 $eax, -5
109109
110110
$eax = LEA32r killed $eax, 1, killed $ebp, -5, $noreg
@@ -139,7 +139,7 @@ frameInfo:
139139
body: |
140140
bb.0 (%ir-block.0):
141141
liveins: $eax, $ebp
142-
; CHECK: $ebp = ADD32rr $ebp, killed $eax
142+
; CHECK: $ebp = ADD32rr $ebp, $eax
143143
; CHECK: $ebp = ADD32ri8 $ebp, -5
144144
145145
$ebp = LEA32r killed $ebp, 1, killed $eax, -5, $noreg
@@ -315,7 +315,7 @@ frameInfo:
315315
body: |
316316
bb.0 (%ir-block.0):
317317
liveins: $eax, $ebp
318-
; CHECK: $eax = ADD32rr $eax, killed $ebp
318+
; CHECK: $eax = ADD32rr $eax, $ebp
319319
; CHECK: $eax = ADD32ri $eax, 129
320320
321321
$eax = LEA32r killed $eax, 1, killed $ebp, 129, $noreg

llvm/test/CodeGen/X86/leaFixup64.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ frameInfo:
177177
body: |
178178
bb.0 (%ir-block.0):
179179
liveins: $rax, $rbp
180-
; CHECK: $eax = LEA64_32r killed $rax, 1, killed $rbp, 0
181-
; CHECK: $eax = ADD32ri8 $eax, -5
180+
; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags, implicit $rax, implicit $rbp
181+
; CHECK: $eax = ADD32ri8 $eax, -5, implicit-def $eflags
182182
183183
$eax = LEA64_32r killed $rax, 1, killed $rbp, -5, $noreg
184184
RETQ $eax
@@ -212,8 +212,8 @@ frameInfo:
212212
body: |
213213
bb.0 (%ir-block.0):
214214
liveins: $rax, $rbp
215-
; CHECK: $ebp = LEA64_32r killed $rax, 1, killed $rbp, 0
216-
; CHECK: $ebp = ADD32ri8 $ebp, -5
215+
; CHECK: $ebp = ADD32rr $ebp, $eax, implicit-def $eflags, implicit $rbp, implicit $rax
216+
; CHECK: $ebp = ADD32ri8 $ebp, -5, implicit-def $eflags
217217
218218
$ebp = LEA64_32r killed $rbp, 1, killed $rax, -5, $noreg
219219
RETQ $ebp
@@ -281,7 +281,7 @@ frameInfo:
281281
body: |
282282
bb.0 (%ir-block.0):
283283
liveins: $rax, $rbp
284-
; CHECK: $rax = ADD64rr $rax, killed $rbp
284+
; CHECK: $rax = ADD64rr $rax, $rbp
285285
; CHECK: $rax = ADD64ri8 $rax, -5
286286
287287
$rax = LEA64r killed $rax, 1, killed $rbp, -5, $noreg
@@ -316,7 +316,7 @@ frameInfo:
316316
body: |
317317
bb.0 (%ir-block.0):
318318
liveins: $rax, $rbp
319-
; CHECK: $rbp = ADD64rr $rbp, killed $rax
319+
; CHECK: $rbp = ADD64rr $rbp, $rax
320320
; CHECK: $rbp = ADD64ri8 $rbp, -5
321321
322322
$rbp = LEA64r killed $rbp, 1, killed $rax, -5, $noreg
@@ -635,8 +635,8 @@ frameInfo:
635635
body: |
636636
bb.0 (%ir-block.0):
637637
liveins: $rax, $rbp
638-
; CHECK: $eax = LEA64_32r killed $rax, 1, killed $rbp, 0
639-
; CHECK: $eax = ADD32ri $eax, 129
638+
; CHECK: $eax = ADD32rr $eax, $ebp, implicit-def $eflags
639+
; CHECK: $eax = ADD32ri $eax, 129, implicit-def $eflags
640640
641641
$eax = LEA64_32r killed $rax, 1, killed $rbp, 129, $noreg
642642
RETQ $eax
@@ -772,8 +772,8 @@ frameInfo:
772772
body: |
773773
bb.0 (%ir-block.0):
774774
liveins: $rax, $rbp
775-
; CHECK: $rax = ADD64rr $rax, killed $rbp
776-
; CHECK: $rax = ADD64ri32 $rax, 129
775+
; CHECK: $rax = ADD64rr $rax, $rbp, implicit-def $eflags
776+
; CHECK: $rax = ADD64ri32 $rax, 129, implicit-def $eflags
777777
778778
$rax = LEA64r killed $rax, 1, killed $rbp, 129, $noreg
779779
RETQ $eax

llvm/test/CodeGen/X86/select-1-or-neg1.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ define i32 @PR28968(i32 %x) {
1919
; SLOWLEA3-NEXT: xorl %eax, %eax
2020
; SLOWLEA3-NEXT: cmpl $1, %edi
2121
; SLOWLEA3-NEXT: sete %al
22-
; SLOWLEA3-NEXT: leal (%rax,%rax), %eax
23-
; SLOWLEA3-NEXT: addl $-1, %eax
22+
; SLOWLEA3-NEXT: addl %eax, %eax
23+
; SLOWLEA3-NEXT: decl %eax
2424
; SLOWLEA3-NEXT: retq
2525
%cmp = icmp eq i32 %x, 1
2626
%sel = select i1 %cmp, i32 1, i32 -1

0 commit comments

Comments
 (0)