@@ -298,6 +298,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
298298 case AArch64::STRXui:
299299 case AArch64::STRXpre:
300300 case AArch64::STURXi:
301+ case AArch64::STR_ZXI:
301302 case AArch64::LDRDui:
302303 case AArch64::LDURDi:
303304 case AArch64::LDRDpre:
@@ -316,6 +317,7 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
316317 case AArch64::LDRSui:
317318 case AArch64::LDURSi:
318319 case AArch64::LDRSpre:
320+ case AArch64::LDR_ZXI:
319321 return Opc;
320322 case AArch64::LDRSWui:
321323 return AArch64::LDRWui;
@@ -361,6 +363,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
361363 return AArch64::STPDpre;
362364 case AArch64::STRQui:
363365 case AArch64::STURQi:
366+ case AArch64::STR_ZXI:
364367 return AArch64::STPQi;
365368 case AArch64::STRQpre:
366369 return AArch64::STPQpre;
@@ -386,6 +389,7 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
386389 return AArch64::LDPDpre;
387390 case AArch64::LDRQui:
388391 case AArch64::LDURQi:
392+ case AArch64::LDR_ZXI:
389393 return AArch64::LDPQi;
390394 case AArch64::LDRQpre:
391395 return AArch64::LDPQpre;
@@ -1225,6 +1229,16 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
12251229 (void )MIBSXTW;
12261230 LLVM_DEBUG (dbgs () << " Extend operand:\n " );
12271231 LLVM_DEBUG (((MachineInstr *)MIBSXTW)->print (dbgs ()));
1232+ } else if (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) {
1233+ // We are combining SVE fill/spill to LDP/STP, so we need to use the Q
1234+ // variant of the registers.
1235+ MachineOperand &MOp0 = MIB->getOperand (0 );
1236+ MachineOperand &MOp1 = MIB->getOperand (1 );
1237+ assert (AArch64::ZPRRegClass.contains (MOp0.getReg ()) &&
1238+ AArch64::ZPRRegClass.contains (MOp1.getReg ()) && " Invalid register." );
1239+ MOp0.setReg (AArch64::Q0 + (MOp0.getReg () - AArch64::Z0));
1240+ MOp1.setReg (AArch64::Q0 + (MOp1.getReg () - AArch64::Z0));
1241+ LLVM_DEBUG (((MachineInstr *)MIB)->print (dbgs ()));
12281242 } else {
12291243 LLVM_DEBUG (((MachineInstr *)MIB)->print (dbgs ()));
12301244 }
@@ -1499,6 +1513,12 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
14991513 if (OpcA == OpcB)
15001514 return !AArch64InstrInfo::isPreLdSt (FirstMI);
15011515
1516+ // Bail out if one of the opcodes is SVE fill/spill, as we currently don't
1517+ // allow pairing them with other instructions.
1518+ if (OpcA == AArch64::LDR_ZXI || OpcA == AArch64::STR_ZXI ||
1519+ OpcB == AArch64::LDR_ZXI || OpcB == AArch64::STR_ZXI)
1520+ return false ;
1521+
15021522 // Two pre ld/st of different opcodes cannot be merged either
15031523 if (AArch64InstrInfo::isPreLdSt (FirstMI) && AArch64InstrInfo::isPreLdSt (MI))
15041524 return false ;
@@ -2659,7 +2679,8 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
26592679 // Get the needed alignments to check them if
26602680 // ldp-aligned-only/stp-aligned-only features are opted.
26612681 uint64_t MemAlignment = MemOp->getAlign ().value ();
2662- uint64_t TypeAlignment = Align (MemOp->getSize ().getValue ()).value ();
2682+ uint64_t TypeAlignment =
2683+ Align (MemOp->getSize ().getValue ().getKnownMinValue ()).value ();
26632684
26642685 if (MemAlignment < 2 * TypeAlignment) {
26652686 NumFailedAlignmentCheck++;
@@ -2820,11 +2841,18 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
28202841 }
28212842 // 3) Find loads and stores that can be merged into a single load or store
28222843 // pair instruction.
2844+ // When compiling for SVE 128, also try to combine SVE fill/spill
2845+ // instructions into LDP/STP.
28232846 // e.g.,
28242847 // ldr x0, [x2]
28252848 // ldr x1, [x2, #8]
28262849 // ; becomes
28272850 // ldp x0, x1, [x2]
2851+ // e.g.,
2852+ // ldr z0, [x2]
2853+ // ldr z1, [x2, #1, mul vl]
2854+ // ; becomes
2855+ // ldp q0, q1, [x2]
28282856
28292857 if (MBB.getParent ()->getRegInfo ().tracksLiveness ()) {
28302858 DefinedInBB.clear ();
0 commit comments