@@ -1745,8 +1745,24 @@ static unsigned sForm(MachineInstr &Instr) {
17451745 return AArch64::SBCSXr;
17461746 case AArch64::ANDWri:
17471747 return AArch64::ANDSWri;
1748+ case AArch64::ANDWrr:
1749+ return AArch64::ANDSWrr;
1750+ case AArch64::ANDWrs:
1751+ return AArch64::ANDSWrs;
1752+ case AArch64::BICWrr:
1753+ return AArch64::BICSWrr;
1754+ case AArch64::BICWrs:
1755+ return AArch64::BICSWrs;
17481756 case AArch64::ANDXri:
17491757 return AArch64::ANDSXri;
1758+ case AArch64::ANDXrr:
1759+ return AArch64::ANDSXrr;
1760+ case AArch64::ANDXrs:
1761+ return AArch64::ANDSXrs;
1762+ case AArch64::BICXrr:
1763+ return AArch64::BICSXrr;
1764+ case AArch64::BICXrs:
1765+ return AArch64::BICSXrs;
17501766 }
17511767}
17521768
@@ -1884,6 +1900,24 @@ static bool isSUBSRegImm(unsigned Opcode) {
18841900 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
18851901}
18861902
1903+ static bool isANDSOpcode (MachineInstr &MI) {
1904+ switch (sForm (MI)) {
1905+ case AArch64::ANDSWri:
1906+ case AArch64::ANDSWrr:
1907+ case AArch64::ANDSWrs:
1908+ case AArch64::ANDSXri:
1909+ case AArch64::ANDSXrr:
1910+ case AArch64::ANDSXrs:
1911+ case AArch64::BICSWrr:
1912+ case AArch64::BICSWrs:
1913+ case AArch64::BICSXrr:
1914+ case AArch64::BICSXrs:
1915+ return true ;
1916+ default :
1917+ return false ;
1918+ }
1919+ }
1920+
18871921// / Check if CmpInstr can be substituted by MI.
18881922// /
18891923// / CmpInstr can be substituted:
@@ -1892,11 +1926,11 @@ static bool isSUBSRegImm(unsigned Opcode) {
18921926// / - and, condition flags are not alive in successors of the CmpInstr parent
18931927// / - and, if MI opcode is the S form there must be no defs of flags between
18941928// / MI and CmpInstr
1895- // / or if MI opcode is not the S form there must be neither defs of flags
1896- // / nor uses of flags between MI and CmpInstr.
1929+ // / or if MI opcode is not the S form there must be neither defs of
1930+ // / flags nor uses of flags between MI and CmpInstr.
18971931// / - and, if C/V flags are not used after CmpInstr
1898- // / or if N flag is used but MI produces poison value if signed overflow
1899- // / occurs.
1932+ // / or if N flag is used but MI produces poison value if signed
1933+ // / overflow occurs.
19001934static bool canInstrSubstituteCmpInstr (MachineInstr &MI, MachineInstr &CmpInstr,
19011935 const TargetRegisterInfo &TRI) {
19021936 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
@@ -1912,7 +1946,17 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
19121946 " Caller guarantees that CmpInstr compares with constant 0" );
19131947
19141948 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse (MI, CmpInstr, TRI);
1915- if (!NZVCUsed || NZVCUsed->C )
1949+ if (!NZVCUsed)
1950+ return false ;
1951+
1952+ // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1953+ // '%vreg = add ...' or '%vreg = sub ...'.
1954+ // Condition flag C is used to indicate unsigned overflow.
1955+ // 1) MI and CmpInstr set N and C to the same value if Cmp is an adds
1956+ // 2) ADDS x, 0, always sets C to 0.
1957+ // In practice we should not really get here, as an unsigned comparison with
1958+ // 0 should have been optimized out anyway, but just in case.
1959+ if (NZVCUsed->C && !isADDSRegImm (CmpOpcode))
19161960 return false ;
19171961
19181962 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
@@ -1921,7 +1965,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
19211965 // 1) MI and CmpInstr set N and V to the same value.
19221966 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
19231967 // signed overflow occurs, so CmpInstr could still be simplified away.
1924- if (NZVCUsed->V && !MI.getFlag (MachineInstr::NoSWrap))
1968+ // 3) ANDS also always sets V to 0.
1969+ if (NZVCUsed->V && !MI.getFlag (MachineInstr::NoSWrap) && !isANDSOpcode (MI))
19251970 return false ;
19261971
19271972 AccessKind AccessToCheck = AK_Write;
@@ -2099,8 +2144,7 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
20992144
21002145 if (MI.getOpcode () == AArch64::CATCHRET) {
21012146 // Skip to the first instruction before the epilog.
2102- const TargetInstrInfo *TII =
2103- MBB.getParent ()->getSubtarget ().getInstrInfo ();
2147+ const TargetInstrInfo *TII = MBB.getParent ()->getSubtarget ().getInstrInfo ();
21042148 MachineBasicBlock *TargetMBB = MI.getOperand (0 ).getMBB ();
21052149 auto MBBI = MachineBasicBlock::iterator (MI);
21062150 MachineBasicBlock::iterator FirstEpilogSEH = std::prev (MBBI);
@@ -2168,16 +2212,16 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21682212 .addUse (Reg, RegState::Kill)
21692213 .addImm (0 );
21702214 } else {
2171- // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2172- // than 23760.
2173- // It might be nice to use AArch64::MOVi32imm here, which would get
2174- // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2175- // contains the MRS result. findScratchNonCalleeSaveRegister() in
2176- // AArch64FrameLowering might help us find such a scratch register
2177- // though. If we failed to find a scratch register, we could emit a
2178- // stream of add instructions to build up the immediate. Or, we could try
2179- // to insert a AArch64::MOVi32imm before register allocation so that we
2180- // didn't need to scavenge for a scratch register.
2215+ // Cases that are larger than +/- 4095 and not a multiple of 8, or
2216+ // larger than 23760. It might be nice to use AArch64::MOVi32imm here,
2217+ // which would get expanded in PreSched2 after PostRA, but our lone
2218+ // scratch Reg already contains the MRS result.
2219+ // findScratchNonCalleeSaveRegister() in AArch64FrameLowering might help
2220+ // us find such a scratch register though. If we failed to find a
2221+ // scratch register, we could emit a stream of add instructions to build
2222+ // up the immediate. Or, we could try to insert a AArch64::MOVi32imm
2223+ // before register allocation so that we didn't need to scavenge for a
2224+ // scratch register.
21812225 report_fatal_error (" Unable to encode Stack Protector Guard Offset" );
21822226 }
21832227 MBB.erase (MI);
@@ -2437,31 +2481,56 @@ bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
24372481
24382482std::optional<unsigned > AArch64InstrInfo::getUnscaledLdSt (unsigned Opc) {
24392483 switch (Opc) {
2440- default : return {};
2441- case AArch64::PRFMui: return AArch64::PRFUMi;
2442- case AArch64::LDRXui: return AArch64::LDURXi;
2443- case AArch64::LDRWui: return AArch64::LDURWi;
2444- case AArch64::LDRBui: return AArch64::LDURBi;
2445- case AArch64::LDRHui: return AArch64::LDURHi;
2446- case AArch64::LDRSui: return AArch64::LDURSi;
2447- case AArch64::LDRDui: return AArch64::LDURDi;
2448- case AArch64::LDRQui: return AArch64::LDURQi;
2449- case AArch64::LDRBBui: return AArch64::LDURBBi;
2450- case AArch64::LDRHHui: return AArch64::LDURHHi;
2451- case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2452- case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2453- case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2454- case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2455- case AArch64::LDRSWui: return AArch64::LDURSWi;
2456- case AArch64::STRXui: return AArch64::STURXi;
2457- case AArch64::STRWui: return AArch64::STURWi;
2458- case AArch64::STRBui: return AArch64::STURBi;
2459- case AArch64::STRHui: return AArch64::STURHi;
2460- case AArch64::STRSui: return AArch64::STURSi;
2461- case AArch64::STRDui: return AArch64::STURDi;
2462- case AArch64::STRQui: return AArch64::STURQi;
2463- case AArch64::STRBBui: return AArch64::STURBBi;
2464- case AArch64::STRHHui: return AArch64::STURHHi;
2484+ default :
2485+ return {};
2486+ case AArch64::PRFMui:
2487+ return AArch64::PRFUMi;
2488+ case AArch64::LDRXui:
2489+ return AArch64::LDURXi;
2490+ case AArch64::LDRWui:
2491+ return AArch64::LDURWi;
2492+ case AArch64::LDRBui:
2493+ return AArch64::LDURBi;
2494+ case AArch64::LDRHui:
2495+ return AArch64::LDURHi;
2496+ case AArch64::LDRSui:
2497+ return AArch64::LDURSi;
2498+ case AArch64::LDRDui:
2499+ return AArch64::LDURDi;
2500+ case AArch64::LDRQui:
2501+ return AArch64::LDURQi;
2502+ case AArch64::LDRBBui:
2503+ return AArch64::LDURBBi;
2504+ case AArch64::LDRHHui:
2505+ return AArch64::LDURHHi;
2506+ case AArch64::LDRSBXui:
2507+ return AArch64::LDURSBXi;
2508+ case AArch64::LDRSBWui:
2509+ return AArch64::LDURSBWi;
2510+ case AArch64::LDRSHXui:
2511+ return AArch64::LDURSHXi;
2512+ case AArch64::LDRSHWui:
2513+ return AArch64::LDURSHWi;
2514+ case AArch64::LDRSWui:
2515+ return AArch64::LDURSWi;
2516+ case AArch64::STRXui:
2517+ return AArch64::STURXi;
2518+ case AArch64::STRWui:
2519+ return AArch64::STURWi;
2520+ case AArch64::STRBui:
2521+ return AArch64::STURBi;
2522+ case AArch64::STRHui:
2523+ return AArch64::STURHi;
2524+ case AArch64::STRSui:
2525+ return AArch64::STURSi;
2526+ case AArch64::STRDui:
2527+ return AArch64::STURDi;
2528+ case AArch64::STRQui:
2529+ return AArch64::STURQi;
2530+ case AArch64::STRBBui:
2531+ return AArch64::STURBBi;
2532+ case AArch64::STRHHui:
2533+ return AArch64::STURHHi;
24652534 }
24662535}
24672536
@@ -2909,8 +2978,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
29092978 MI.getOperand (IsPreLdSt ? 2 : 1 ).isFI ()) &&
29102979 " Expected a reg or frame index operand." );
29112980
2912- // For Pre-indexed addressing quadword instructions, the third operand is the
2913- // immediate value.
2981+ // For Pre-indexed addressing quadword instructions, the third operand is
2982+ // the immediate value.
29142983 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand (3 ).isImm ();
29152984
29162985 if (!MI.getOperand (2 ).isImm () && !IsImmPreLdSt)
@@ -2951,17 +3020,18 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
29513020 return false ;
29523021
29533022 // Do not pair any callee-save store/reload instructions in the
2954- // prologue/epilogue if the CFI information encoded the operations as separate
2955- // instructions, as that will cause the size of the actual prologue to mismatch
2956- // with the prologue size recorded in the Windows CFI.
3023+ // prologue/epilogue if the CFI information encoded the operations as
3024+ // separate instructions, as that will cause the size of the actual prologue
3025+ // to mismatch with the prologue size recorded in the Windows CFI.
29573026 const MCAsmInfo *MAI = MI.getMF ()->getTarget ().getMCAsmInfo ();
29583027 bool NeedsWinCFI = MAI->usesWindowsCFI () &&
29593028 MI.getMF ()->getFunction ().needsUnwindTableEntry ();
29603029 if (NeedsWinCFI && (MI.getFlag (MachineInstr::FrameSetup) ||
29613030 MI.getFlag (MachineInstr::FrameDestroy)))
29623031 return false ;
29633032
2964- // On some CPUs quad load/store pairs are slower than two single load/stores.
3033+ // On some CPUs quad load/store pairs are slower than two single
3034+ // load/stores.
29653035 if (Subtarget.isPaired128Slow ()) {
29663036 switch (MI.getOpcode ()) {
29673037 default :
@@ -3138,8 +3208,8 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
31383208 OffsetScale = 1 ;
31393209
31403210 // If the address instructions is folded into the base register, then the
3141- // addressing mode must not have a scale. Then we can swap the base and the
3142- // scaled registers.
3211+ // addressing mode must not have a scale. Then we can swap the base and
3212+ // the scaled registers.
31433213 if (MemI.getOperand (1 ).getReg () == Reg && OffsetScale != 1 )
31443214 return false ;
31453215
@@ -3344,8 +3414,8 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI,
33443414}
33453415
33463416// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3347- // return the opcode of an instruction performing the same operation, but using
3348- // the [Reg, Reg] addressing mode.
3417+ // return the opcode of an instruction performing the same operation, but
3418+ // using the [Reg, Reg] addressing mode.
33493419static unsigned regOffsetOpcode (unsigned Opcode) {
33503420 switch (Opcode) {
33513421 default :
@@ -3417,9 +3487,9 @@ static unsigned regOffsetOpcode(unsigned Opcode) {
34173487 }
34183488}
34193489
3420- // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3421- // the opcode of an instruction performing the same operation, but using the
3422- // [Reg, #Imm] addressing mode with scaled offset.
3490+ // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3491+ // return the opcode of an instruction performing the same operation, but
3492+ // using the [Reg, #Imm] addressing mode with scaled offset.
34233493unsigned scaledOffsetOpcode (unsigned Opcode, unsigned &Scale) {
34243494 switch (Opcode) {
34253495 default :
@@ -3522,9 +3592,9 @@ unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
35223592 }
35233593}
35243594
3525- // Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3526- // the opcode of an instruction performing the same operation, but using the
3527- // [Reg, #Imm] addressing mode with unscaled offset.
3595+ // Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3596+ // return the opcode of an instruction performing the same operation, but
3597+ // using the [Reg, #Imm] addressing mode with unscaled offset.
35283598unsigned unscaledOffsetOpcode (unsigned Opcode) {
35293599 switch (Opcode) {
35303600 default :
@@ -3597,10 +3667,9 @@ unsigned unscaledOffsetOpcode(unsigned Opcode) {
35973667 }
35983668}
35993669
3600- // Given the opcode of a memory load/store instruction, return the opcode of an
3601- // instruction performing the same operation, but using
3602- // the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3603- // offset register.
3670+ // Given the opcode of a memory load/store instruction, return the opcode of
3671+ // an instruction performing the same operation, but using the [Reg, Reg,
3672+ // {s,u}xtw #N] addressing mode with sign-/zero-extend of the offset register.
36043673static unsigned offsetExtendOpcode (unsigned Opcode) {
36053674 switch (Opcode) {
36063675 default :
@@ -3740,7 +3809,8 @@ MachineInstr *AArch64InstrInfo::emitLdStWithAddr(MachineInstr &MemI,
37403809
37413810 if (AM.Form == ExtAddrMode::Formula::SExtScaledReg ||
37423811 AM.Form == ExtAddrMode::Formula::ZExtScaledReg) {
3743- // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3812+ // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw
3813+ // #N]`.
37443814 assert (AM.ScaledReg && !AM.Displacement &&
37453815 " Address offset can be a register or an immediate, but not both" );
37463816 unsigned Opcode = offsetExtendOpcode (MemI.getOpcode ());
@@ -4023,8 +4093,8 @@ bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
40234093 return false ;
40244094
40254095 // Compute the offset. Offset is calculated as the immediate operand
4026- // multiplied by the scaling factor. Unscaled instructions have scaling factor
4027- // set to 1. Postindex are a special case which have an offset of 0.
4096+ // multiplied by the scaling factor. Unscaled instructions have scaling
4097+ // factor set to 1. Postindex are a special case which have an offset of 0.
40284098 if (isPostIndexLdStOpcode (LdSt.getOpcode ())) {
40294099 BaseOp = &LdSt.getOperand (2 );
40304100 Offset = 0 ;
@@ -4728,8 +4798,7 @@ bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
47284798 if (Reg.isPhysical ())
47294799 return AArch64::FPR16RegClass.contains (Reg);
47304800 const TargetRegisterClass *TRC = ::getRegClass (MI, Reg);
4731- return TRC == &AArch64::FPR16RegClass ||
4732- TRC == &AArch64::FPR16_loRegClass;
4801+ return TRC == &AArch64::FPR16RegClass || TRC == &AArch64::FPR16_loRegClass;
47334802 };
47344803 return llvm::any_of (MI.operands (), IsHFPR);
47354804}
0 commit comments