|
64 | 64 | // 8. Remove redundant CSELs that select between identical registers, by |
65 | 65 | // replacing them with unconditional moves. |
66 | 66 | // |
| 67 | +// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit |
| 68 | +// LSR or LSL alias of UBFM. |
| 69 | +// |
67 | 70 | //===----------------------------------------------------------------------===// |
68 | 71 |
|
69 | 72 | #include "AArch64ExpandImm.h" |
@@ -132,6 +135,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { |
132 | 135 | bool visitINSviGPR(MachineInstr &MI, unsigned Opc); |
133 | 136 | bool visitINSvi64lane(MachineInstr &MI); |
134 | 137 | bool visitFMOVDr(MachineInstr &MI); |
| 138 | + bool visitUBFMXri(MachineInstr &MI); |
135 | 139 | bool visitCopy(MachineInstr &MI); |
136 | 140 | bool runOnMachineFunction(MachineFunction &MF) override; |
137 | 141 |
|
@@ -715,6 +719,57 @@ bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) { |
715 | 719 | return true; |
716 | 720 | } |
717 | 721 |
|
| 722 | +bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) { |
| 723 | + // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of |
| 724 | + // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri. |
| 725 | + int64_t Immr = MI.getOperand(2).getImm(); |
| 726 | + int64_t Imms = MI.getOperand(3).getImm(); |
| 727 | + |
| 728 | + bool IsLSR = Imms == 31 && Immr <= Imms; |
| 729 | + bool IsLSL = Immr == Imms + 33; |
| 730 | + if (!IsLSR && !IsLSL) |
| 731 | + return false; |
| 732 | + |
| 733 | + if (IsLSL) { |
| 734 | + Immr -= 32; |
| 735 | + } |
| 736 | + |
| 737 | + const TargetRegisterClass *DstRC64 = |
| 738 | + TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI, *MI.getMF()); |
| 739 | + const TargetRegisterClass *DstRC32 = |
| 740 | + TRI->getSubRegisterClass(DstRC64, AArch64::sub_32); |
| 741 | + assert(DstRC32 && "Destination register class of UBFMXri doesn't have a " |
| 742 | + "sub_32 subregister class"); |
| 743 | + |
| 744 | + const TargetRegisterClass *SrcRC64 = |
| 745 | + TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI, *MI.getMF()); |
| 746 | + const TargetRegisterClass *SrcRC32 = |
| 747 | + TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32); |
| 748 | + assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 " |
| 749 | + "subregister class"); |
| 750 | + |
| 751 | + Register DstReg64 = MI.getOperand(0).getReg(); |
| 752 | + Register DstReg32 = MRI->createVirtualRegister(DstRC32); |
| 753 | + Register SrcReg64 = MI.getOperand(1).getReg(); |
| 754 | + Register SrcReg32 = MRI->createVirtualRegister(SrcRC32); |
| 755 | + |
| 756 | + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY), |
| 757 | + SrcReg32) |
| 758 | + .addReg(SrcReg64, 0, AArch64::sub_32); |
| 759 | + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri), |
| 760 | + DstReg32) |
| 761 | + .addReg(SrcReg32) |
| 762 | + .addImm(Immr) |
| 763 | + .addImm(Imms); |
| 764 | + BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), |
| 765 | + TII->get(AArch64::SUBREG_TO_REG), DstReg64) |
| 766 | + .addImm(0) |
| 767 | + .addReg(DstReg32) |
| 768 | + .addImm(AArch64::sub_32); |
| 769 | + MI.eraseFromParent(); |
| 770 | + return true; |
| 771 | +} |
| 772 | + |
718 | 773 | // Across a basic-block we might have in i32 extract from a value that only |
719 | 774 | // operates on upper bits (for example a sxtw). We can replace the COPY with a |
720 | 775 | // new version skipping the sxtw. |
@@ -865,6 +920,9 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { |
865 | 920 | case AArch64::FMOVDr: |
866 | 921 | Changed |= visitFMOVDr(MI); |
867 | 922 | break; |
| 923 | + case AArch64::UBFMXri: |
| 924 | + Changed |= visitUBFMXri(MI); |
| 925 | + break; |
868 | 926 | case AArch64::COPY: |
869 | 927 | Changed |= visitCopy(MI); |
870 | 928 | break; |
|
0 commit comments