-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AArch64][SME] Reduce ptrue count when filling p-regs from z-regs #125523
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
Member
|
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesCurrently, each expansion of Patch is 22.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/125523.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 81523adeefceed..d3abd79b85a75f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -4175,7 +4175,10 @@ int64_t AArch64FrameLowering::assignSVEStackObjectOffsets(
/// Attempts to scavenge a register from \p ScavengeableRegs given the used
/// registers in \p UsedRegs.
static Register tryScavengeRegister(LiveRegUnits const &UsedRegs,
- BitVector const &ScavengeableRegs) {
+ BitVector const &ScavengeableRegs,
+ Register PreferredReg) {
+ if (PreferredReg != AArch64::NoRegister && UsedRegs.available(PreferredReg))
+ return PreferredReg;
for (auto Reg : ScavengeableRegs.set_bits()) {
if (UsedRegs.available(Reg))
return Reg;
@@ -4212,11 +4215,12 @@ struct ScopedScavengeOrSpill {
Register SpillCandidate, const TargetRegisterClass &RC,
LiveRegUnits const &UsedRegs,
BitVector const &AllocatableRegs,
- std::optional<int> *MaybeSpillFI)
+ std::optional<int> *MaybeSpillFI,
+ Register PreferredReg = AArch64::NoRegister)
: MBB(MBB), MBBI(MBBI), RC(RC), TII(static_cast<const AArch64InstrInfo &>(
*MF.getSubtarget().getInstrInfo())),
TRI(*MF.getSubtarget().getRegisterInfo()) {
- FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs);
+ FreeReg = tryScavengeRegister(UsedRegs, AllocatableRegs, PreferredReg);
if (FreeReg != AArch64::NoRegister)
return;
assert(MaybeSpillFI && "Expected emergency spill slot FI information "
@@ -4331,12 +4335,10 @@ static void expandSpillPPRToZPRSlotPseudo(MachineBasicBlock &MBB,
/// spilling if necessary). If the status flags are in use at the point of
/// expansion they are preserved (by moving them to/from a GPR). This may cause
/// an additional spill if no GPR is free at the expansion point.
-static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
- MachineInstr &MI,
- const TargetRegisterInfo &TRI,
- LiveRegUnits const &UsedRegs,
- ScavengeableRegs const &SR,
- EmergencyStackSlots &SpillSlots) {
+static bool expandFillPPRFromZPRSlotPseudo(
+ MachineBasicBlock &MBB, MachineInstr &MI, const TargetRegisterInfo &TRI,
+ LiveRegUnits const &UsedRegs, ScavengeableRegs const &SR,
+ MachineInstr *&LastPTrue, EmergencyStackSlots &SpillSlots) {
MachineFunction &MF = *MBB.getParent();
auto *TII =
static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -4347,7 +4349,9 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
ScopedScavengeOrSpill PredReg(
MF, MBB, MI, AArch64::P0, AArch64::PPR_3bRegClass, UsedRegs, SR.PPR3bRegs,
- isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI);
+ isInPrologueOrEpilogue(MI) ? nullptr : &SpillSlots.PPRSpillFI,
+ /*PreferredReg=*/
+ LastPTrue ? LastPTrue->getOperand(0).getReg() : AArch64::NoRegister);
// Elide NZCV spills if we know it is not used.
bool IsNZCVUsed = !UsedRegs.available(AArch64::NZCV);
@@ -4371,9 +4375,17 @@ static bool expandFillPPRFromZPRSlotPseudo(MachineBasicBlock &MBB,
.addImm(AArch64SysReg::NZCV)
.addReg(AArch64::NZCV, RegState::Implicit)
.getInstr());
- MachineInstrs.push_back(BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
- .addReg(*PredReg, RegState::Define)
- .addImm(31));
+
+ // Reuse previous ptrue if we know it has not been clobbered.
+ if (LastPTrue) {
+ assert(*PredReg == LastPTrue->getOperand(0).getReg());
+ LastPTrue->moveBefore(&MI);
+ } else {
+ LastPTrue = BuildMI(MBB, MI, DL, TII->get(AArch64::PTRUE_B))
+ .addReg(*PredReg, RegState::Define)
+ .addImm(31);
+ }
+ MachineInstrs.push_back(LastPTrue);
MachineInstrs.push_back(
BuildMI(MBB, MI, DL, TII->get(AArch64::CMPNE_PPzZI_B))
.addReg(MI.getOperand(0).getReg(), RegState::Define)
@@ -4402,19 +4414,24 @@ static bool expandSMEPPRToZPRSpillPseudos(MachineBasicBlock &MBB,
LiveRegUnits UsedRegs(TRI);
UsedRegs.addLiveOuts(MBB);
bool HasPPRSpills = false;
+ MachineInstr *LastPTrue = nullptr;
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
UsedRegs.stepBackward(MI);
switch (MI.getOpcode()) {
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
+ if (LastPTrue &&
+ MI.definesRegister(LastPTrue->getOperand(0).getReg(), &TRI))
+ LastPTrue = nullptr;
HasPPRSpills |= expandFillPPRFromZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR,
- SpillSlots);
+ LastPTrue, SpillSlots);
MI.eraseFromParent();
break;
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
expandSpillPPRToZPRSlotPseudo(MBB, MI, TRI, UsedRegs, SR, SpillSlots);
MI.eraseFromParent();
- break;
+ [[fallthrough]];
default:
+ LastPTrue = nullptr;
break;
}
}
diff --git a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
index b58f91ac68a932..bff0cacfd5190c 100644
--- a/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir
@@ -116,46 +116,34 @@ body: |
; EXPAND-NEXT: $p15 = IMPLICIT_DEF
;
; EXPAND-NEXT: $z0 = LDR_ZXI killed $x8, 0 :: (load (s128) from %stack.0)
- ; EXPAND-NEXT: $p0 = PTRUE_B 31, implicit $vg
- ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p0, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
+ ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
+ ; EXPAND-NEXT: $p0 = CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
;
; EXPAND-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.13)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
@@ -299,37 +287,26 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
@@ -509,37 +486,26 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.8)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.7)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.6)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.5)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.4)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.3)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.2)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 12, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.14)
@@ -754,37 +720,26 @@ body: |
; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.20)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.17)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p11 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 5 :: (load (s128) from %stack.16)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p10 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 6 :: (load (s128) from %stack.15)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p9 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 7 :: (load (s128) from %stack.14)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p8 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 8 :: (load (s128) from %stack.13)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p7 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 9 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p6 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 10 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p5 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z24 = frame-destroy LDR_ZXI $sp, 11 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p1 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p4 = frame-destroy CMPNE_PPzZI_B $p1, $z24, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 20, implicit $vg
; EXPAND-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.22)
@@ -953,37 +908,26 @@ body: |
; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p15 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.12)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p14 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.11)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p13 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.10)
- ; EXPAND-NEXT: $p4 = frame-destroy PTRUE_B 31, implicit $vg
; EXPAND-NEXT: $p12 = frame-destroy CMPNE_PPzZI_B $p4, $z0, 0, implicit-def $nzcv, implicit-def $nzcv
; EXPAND-NEXT: $z0 = frame-destroy LDR_ZXI $sp, 4 :: (load (s128) from %stack.9)
- ; EXPAND-NEXT: $...
[truncated]
|
SamTebbs33
approved these changes
Feb 4, 2025
MacDue
added a commit
to MacDue/llvm-project
that referenced
this pull request
Feb 10, 2025
…vm#125523) Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
swift-ci
pushed a commit
to swiftlang/llvm-project
that referenced
this pull request
Feb 11, 2025
…vm#125523) Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
Icohedron
pushed a commit
to Icohedron/llvm-project
that referenced
this pull request
Feb 11, 2025
…vm#125523) Currently, each expansion of `FILL_PPR_FROM_ZPR_SLOT_PSEUDO` creates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Currently, each expansion of
FILL_PPR_FROM_ZPR_SLOT_PSEUDOcreates a new ptrue instruction. This patch adds a simple method to reuse a previous ptrue instruction when expanding back-to-back fill pseudos.