@@ -67,8 +67,8 @@ class FixupLEAPass : public MachineFunctionPass {
67
67
// / - LEA that uses RIP relative addressing mode
68
68
// / - LEA that uses 16-bit addressing mode "
69
69
// / This function currently handles the first 2 cases only.
70
- MachineInstr * processInstrForSlow3OpLEA (MachineInstr &MI ,
71
- MachineBasicBlock &MBB);
70
+ void processInstrForSlow3OpLEA (MachineBasicBlock::iterator &I ,
71
+ MachineBasicBlock &MBB, bool OptIncDec );
72
72
73
73
// / Look for LEAs that are really two address LEAs that we might be able to
74
74
// / turn into regular ADD instructions.
@@ -216,14 +216,10 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
216
216
if (optTwoAddrLEA (I, MBB, OptIncDec, UseLEAForSP))
217
217
continue ;
218
218
219
- if (IsSlowLEA) {
219
+ if (IsSlowLEA)
220
220
processInstructionForSlowLEA (I, MBB);
221
- } else if (IsSlow3OpsLEA) {
222
- if (auto *NewMI = processInstrForSlow3OpLEA (*I, MBB)) {
223
- MBB.erase (I);
224
- I = NewMI;
225
- }
226
- }
221
+ else if (IsSlow3OpsLEA)
222
+ processInstrForSlow3OpLEA (I, MBB, OptIncDec);
227
223
}
228
224
229
225
// Second pass for creating LEAs. This may reverse some of the
@@ -301,18 +297,14 @@ static inline bool isInefficientLEAReg(unsigned Reg) {
301
297
Reg == X86::R13D || Reg == X86::R13;
302
298
}
303
299
304
- static inline bool isRegOperand (const MachineOperand &Op) {
305
- return Op.isReg () && Op.getReg () != X86::NoRegister;
306
- }
307
-
308
300
// / Returns true if this LEA uses base an index registers, and the base register
309
301
// / is known to be inefficient for the subtarget.
310
302
// TODO: use a variant scheduling class to model the latency profile
311
303
// of LEA instructions, and implement this logic as a scheduling predicate.
312
304
static inline bool hasInefficientLEABaseReg (const MachineOperand &Base,
313
305
const MachineOperand &Index) {
314
- return Base.isReg () && isInefficientLEAReg (Base.getReg ()) &&
315
- isRegOperand ( Index) ;
306
+ return Base.isReg () && isInefficientLEAReg (Base.getReg ()) && Index. isReg () &&
307
+ Index. getReg () != X86::NoRegister ;
316
308
}
317
309
318
310
static inline bool hasLEAOffset (const MachineOperand &Offset) {
@@ -534,112 +526,150 @@ void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
534
526
}
535
527
}
536
528
537
- MachineInstr *
538
- FixupLEAPass::processInstrForSlow3OpLEA (MachineInstr &MI,
539
- MachineBasicBlock &MBB) {
529
+ void FixupLEAPass::processInstrForSlow3OpLEA (MachineBasicBlock::iterator &I,
530
+ MachineBasicBlock &MBB,
531
+ bool OptIncDec) {
532
+ MachineInstr &MI = *I;
540
533
const unsigned LEAOpcode = MI.getOpcode ();
541
534
542
- const MachineOperand &Dst = MI.getOperand (0 );
535
+ const MachineOperand &Dest = MI.getOperand (0 );
543
536
const MachineOperand &Base = MI.getOperand (1 + X86::AddrBaseReg);
544
537
const MachineOperand &Scale = MI.getOperand (1 + X86::AddrScaleAmt);
545
538
const MachineOperand &Index = MI.getOperand (1 + X86::AddrIndexReg);
546
539
const MachineOperand &Offset = MI.getOperand (1 + X86::AddrDisp);
547
540
const MachineOperand &Segment = MI.getOperand (1 + X86::AddrSegmentReg);
548
541
549
- if (!(TII->isThreeOperandsLEA (MI) ||
550
- hasInefficientLEABaseReg (Base, Index)) ||
542
+ if (!(TII->isThreeOperandsLEA (MI) || hasInefficientLEABaseReg (Base, Index)) ||
551
543
!TII->isSafeToClobberEFLAGS (MBB, MI) ||
552
544
Segment.getReg () != X86::NoRegister)
553
- return nullptr ;
545
+ return ;
546
+
547
+ Register DestReg = Dest.getReg ();
548
+ Register BaseReg = Base.getReg ();
549
+ Register IndexReg = Index.getReg ();
550
+
551
+ if (MI.getOpcode () == X86::LEA64_32r) {
552
+ if (BaseReg != 0 )
553
+ BaseReg = TRI->getSubReg (BaseReg, X86::sub_32bit);
554
+ if (IndexReg != 0 )
555
+ IndexReg = TRI->getSubReg (IndexReg, X86::sub_32bit);
556
+ }
554
557
555
- Register DstR = Dst.getReg ();
556
- Register BaseR = Base.getReg ();
557
- Register IndexR = Index.getReg ();
558
- Register SSDstR =
559
- (LEAOpcode == X86::LEA64_32r) ? Register (getX86SubSuperRegister (DstR, 64 ))
560
- : DstR;
561
558
bool IsScale1 = Scale.getImm () == 1 ;
562
- bool IsInefficientBase = isInefficientLEAReg (BaseR );
563
- bool IsInefficientIndex = isInefficientLEAReg (IndexR );
559
+ bool IsInefficientBase = isInefficientLEAReg (BaseReg );
560
+ bool IsInefficientIndex = isInefficientLEAReg (IndexReg );
564
561
565
562
// Skip these cases since it takes more than 2 instructions
566
563
// to replace the LEA instruction.
567
- if (IsInefficientBase && SSDstR == BaseR && !IsScale1)
568
- return nullptr ;
569
- if (LEAOpcode == X86::LEA64_32r && IsInefficientBase &&
570
- (IsInefficientIndex || !IsScale1))
571
- return nullptr ;
572
-
573
- const DebugLoc DL = MI.getDebugLoc ();
574
- const MCInstrDesc &ADDrr = TII->get (getADDrrFromLEA (LEAOpcode));
575
- const MCInstrDesc &ADDri = TII->get (getADDriFromLEA (LEAOpcode, Offset));
564
+ if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
565
+ return ;
576
566
577
567
LLVM_DEBUG (dbgs () << " FixLEA: Candidate to replace:" ; MI.dump (););
578
568
LLVM_DEBUG (dbgs () << " FixLEA: Replaced by: " ;);
579
569
570
+ MachineInstr *NewMI = nullptr ;
571
+
580
572
// First try to replace LEA with one or two (for the 3-op LEA case)
581
573
// add instructions:
582
574
// 1.lea (%base,%index,1), %base => add %index,%base
583
575
// 2.lea (%base,%index,1), %index => add %base,%index
584
- if (IsScale1 && (DstR == BaseR || DstR == IndexR)) {
585
- const MachineOperand &Src = DstR == BaseR ? Index : Base;
586
- MachineInstr *NewMI =
587
- BuildMI (MBB, MI, DL, ADDrr, DstR).addReg (DstR).add (Src);
588
- LLVM_DEBUG (NewMI->dump (););
589
- // Create ADD instruction for the Offset in case of 3-Ops LEA.
590
- if (hasLEAOffset (Offset)) {
591
- NewMI = BuildMI (MBB, MI, DL, ADDri, DstR).addReg (DstR).add (Offset);
592
- LLVM_DEBUG (NewMI->dump (););
576
+ if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
577
+ unsigned NewOpc = getADDrrFromLEA (MI.getOpcode ());
578
+ if (DestReg != BaseReg)
579
+ std::swap (BaseReg, IndexReg);
580
+
581
+ if (MI.getOpcode () == X86::LEA64_32r) {
582
+ // TODO: Do we need the super register implicit use?
583
+ NewMI = BuildMI (MBB, I, MI.getDebugLoc (), TII->get (NewOpc), DestReg)
584
+ .addReg (BaseReg)
585
+ .addReg (IndexReg)
586
+ .addReg (Base.getReg (), RegState::Implicit)
587
+ .addReg (Index.getReg (), RegState::Implicit);
588
+ } else {
589
+ NewMI = BuildMI (MBB, I, MI.getDebugLoc (), TII->get (NewOpc), DestReg)
590
+ .addReg (BaseReg)
591
+ .addReg (IndexReg);
593
592
}
594
- return NewMI;
595
- }
596
- // If the base is inefficient try switching the index and base operands,
597
- // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
598
- // lea offset(%base,%index,scale),%dst =>
599
- // lea (%base,%index,scale); add offset,%dst
600
- if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
601
- MachineInstr *NewMI = BuildMI (MBB, MI, DL, TII->get (LEAOpcode))
602
- .add (Dst)
603
- .add (IsInefficientBase ? Index : Base)
604
- .add (Scale)
605
- .add (IsInefficientBase ? Base : Index)
606
- .addImm (0 )
607
- .add (Segment);
593
+ } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
594
+ // If the base is inefficient try switching the index and base operands,
595
+ // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
596
+ // lea offset(%base,%index,scale),%dst =>
597
+ // lea (%base,%index,scale); add offset,%dst
598
+ NewMI = BuildMI (MBB, MI, MI.getDebugLoc (), TII->get (LEAOpcode))
599
+ .add (Dest)
600
+ .add (IsInefficientBase ? Index : Base)
601
+ .add (Scale)
602
+ .add (IsInefficientBase ? Base : Index)
603
+ .addImm (0 )
604
+ .add (Segment);
608
605
LLVM_DEBUG (NewMI->dump (););
606
+ }
607
+
608
+ // If either replacement succeeded above, add the offset if needed, then
609
+ // replace the instruction.
610
+ if (NewMI) {
609
611
// Create ADD instruction for the Offset in case of 3-Ops LEA.
610
612
if (hasLEAOffset (Offset)) {
611
- NewMI = BuildMI (MBB, MI, DL, ADDri, DstR).addReg (DstR).add (Offset);
612
- LLVM_DEBUG (NewMI->dump (););
613
+ if (OptIncDec && Offset.isImm () &&
614
+ (Offset.getImm () == 1 || Offset.getImm () == -1 )) {
615
+ unsigned NewOpc =
616
+ getINCDECFromLEA (MI.getOpcode (), Offset.getImm () == 1 );
617
+ NewMI = BuildMI (MBB, I, MI.getDebugLoc (), TII->get (NewOpc), DestReg)
618
+ .addReg (DestReg);
619
+ LLVM_DEBUG (NewMI->dump (););
620
+ } else {
621
+ unsigned NewOpc = getADDriFromLEA (MI.getOpcode (), Offset);
622
+ NewMI = BuildMI (MBB, I, MI.getDebugLoc (), TII->get (NewOpc), DestReg)
623
+ .addReg (DestReg)
624
+ .add (Offset);
625
+ LLVM_DEBUG (NewMI->dump (););
626
+ }
613
627
}
614
- return NewMI;
628
+
629
+ MBB.erase (I);
630
+ I = NewMI;
631
+ return ;
615
632
}
633
+
616
634
// Handle the rest of the cases with inefficient base register:
617
- assert (SSDstR != BaseR && " SSDstR == BaseR should be handled already!" );
635
+ assert (DestReg != BaseReg && " DestReg == BaseReg should be handled already!" );
618
636
assert (IsInefficientBase && " efficient base should be handled already!" );
619
637
638
+ // FIXME: Handle LEA64_32r.
639
+ if (LEAOpcode == X86::LEA64_32r)
640
+ return ;
641
+
620
642
// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
621
643
if (IsScale1 && !hasLEAOffset (Offset)) {
622
- bool BIK = Base.isKill () && BaseR != IndexR ;
623
- TII->copyPhysReg (MBB, MI, DL, DstR, BaseR , BIK);
644
+ bool BIK = Base.isKill () && BaseReg != IndexReg ;
645
+ TII->copyPhysReg (MBB, MI, MI. getDebugLoc (), DestReg, BaseReg , BIK);
624
646
LLVM_DEBUG (MI.getPrevNode ()->dump (););
625
647
626
- MachineInstr *NewMI =
627
- BuildMI (MBB, MI, DL, ADDrr, DstR).addReg (DstR).add (Index);
648
+ unsigned NewOpc = getADDrrFromLEA (MI.getOpcode ());
649
+ NewMI = BuildMI (MBB, MI, MI.getDebugLoc (), TII->get (NewOpc), DestReg)
650
+ .addReg (DestReg)
651
+ .add (Index);
628
652
LLVM_DEBUG (NewMI->dump (););
629
- return NewMI ;
653
+ return ;
630
654
}
655
+
631
656
// lea offset(%base,%index,scale), %dst =>
632
657
// lea offset( ,%index,scale), %dst; add %base,%dst
633
- MachineInstr * NewMI = BuildMI (MBB, MI, DL , TII->get (LEAOpcode))
634
- .add (Dst )
635
- .addReg (0 )
636
- .add (Scale)
637
- .add (Index)
638
- .add (Offset)
639
- .add (Segment);
658
+ NewMI = BuildMI (MBB, MI, MI. getDebugLoc () , TII->get (LEAOpcode))
659
+ .add (Dest )
660
+ .addReg (0 )
661
+ .add (Scale)
662
+ .add (Index)
663
+ .add (Offset)
664
+ .add (Segment);
640
665
LLVM_DEBUG (NewMI->dump (););
641
666
642
- NewMI = BuildMI (MBB, MI, DL, ADDrr, DstR).addReg (DstR).add (Base);
667
+ unsigned NewOpc = getADDrrFromLEA (MI.getOpcode ());
668
+ NewMI = BuildMI (MBB, MI, MI.getDebugLoc (), TII->get (NewOpc), DestReg)
669
+ .addReg (DestReg)
670
+ .add (Base);
643
671
LLVM_DEBUG (NewMI->dump (););
644
- return NewMI;
672
+
673
+ MBB.erase (I);
674
+ I = NewMI;
645
675
}
0 commit comments