-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[RISCV][llvm] Handle vector callee saved register correctly #149467
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
In TargetFrameLowering::determineCalleeSaves, any vector register is marked as saved if any of its subregister is clobbered, this is not correct in vector registers. We only want the vector register to be marked as saved only if all of its subregisters are clobbered. This patch handles vector callee saved registers in target hook.
|
@llvm/pr-subscribers-backend-risc-v Author: Brandon Wu (4vtomat) ChangesIn TargetFrameLowering::determineCalleeSaves, any vector register is marked Patch is 319.62 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149467.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td
index cbf039edec273..4c303a93c7349 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -56,19 +56,21 @@ def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
(sequence "F%u_D", 0, 31))>;
+defvar VREGS = (add (sequence "V%u", 0, 31),
+ (sequence "V%uM2", 0, 31, 2),
+ (sequence "V%uM4", 0, 31, 4),
+ (sequence "V%uM8", 0, 31, 8));
+
// Same as CSR_Interrupt, but including all vector registers.
-def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_V_Interrupt: CalleeSavedRegs<(add CSR_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but including all 32-bit FP registers and all vector
// registers.
-def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_F32_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F32_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but including all 64-bit FP registers and all vector
// registers.
-def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt,
- (sequence "V%u", 0, 31))>;
+def CSR_XLEN_F64_V_Interrupt: CalleeSavedRegs<(add CSR_XLEN_F64_Interrupt, VREGS)>;
// Same as CSR_Interrupt, but excluding X16-X31.
def CSR_Interrupt_RVE : CalleeSavedRegs<(sub CSR_Interrupt,
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 6c8e3da80b932..99e7da57c8ee7 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1515,10 +1515,53 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
return Offset;
}
+static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
+ const Register &Reg) {
+ MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
+ // If it's not a grouped vector register, it doesn't have subregister, so
+ // the base register is just itself.
+ if (BaseReg == RISCV::NoRegister)
+ BaseReg = Reg;
+ return BaseReg;
+}
+
void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+ // In TargetFrameLowering::determineCalleeSaves, any vector register is marked
+ // as saved if any of its subregister is clobbered, this is not correct in
+ // vector registers. We only want the vector register to be marked as saved
+ // only if all of its subregisters are clobbered.
+ // For example:
+ // Original behavior: If v24 is marked, v24m2, v24m4, v24m8 are also marked.
+ // Correct behavior: v24m2 is marked only if v24 and v25 are marked.
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
+ const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
+ for (unsigned i = 0; CSRegs[i]; ++i) {
+ unsigned CSReg = CSRegs[i];
+ // Only vector registers need special care.
+ if (!RISCV::VRRegClass.contains(getRVVBaseRegister(TRI, CSReg)))
+ continue;
+
+ SavedRegs.reset(CSReg);
+
+ auto SubRegs = TRI.subregs(CSReg);
+ // Set the register and it's all subregisters.
+ if (!MRI.def_empty(CSReg) || MRI.getUsedPhysRegsMask().test(CSReg)) {
+ SavedRegs.set(CSReg);
+ llvm::for_each(SubRegs, [&](unsigned Reg) { return SavedRegs.set(Reg); });
+ }
+
+ // Combine to super register if all of its subregisters are marked.
+ if (!SubRegs.empty() && llvm::all_of(SubRegs, [&](unsigned Reg) {
+ return SavedRegs.test(Reg);
+ }))
+ SavedRegs.set(CSReg);
+ }
+
// Unconditionally spill RA and FP only if the function uses a frame
// pointer.
if (hasFP(MF)) {
@@ -2107,16 +2150,6 @@ static unsigned getCalleeSavedRVVNumRegs(const Register &BaseReg) {
: 8;
}
-static MCRegister getRVVBaseRegister(const RISCVRegisterInfo &TRI,
- const Register &Reg) {
- MCRegister BaseReg = TRI.getSubReg(Reg, RISCV::sub_vrm1_0);
- // If it's not a grouped vector register, it doesn't have subregister, so
- // the base register is just itself.
- if (BaseReg == RISCV::NoRegister)
- BaseReg = Reg;
- return BaseReg;
-}
-
void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, bool HasFP) const {
MachineFunction *MF = MBB.getParent();
diff --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
index e278b8d0b53b2..472b9031a5cae 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
@@ -794,498 +794,46 @@ define void @foo_with_call() #1 {
; CHECK-RV32-V-NEXT: slli a0, a0, 5
; CHECK-RV32-V-NEXT: sub sp, sp, a0
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v0, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v1, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v2, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v3, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v4, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v5, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v6, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v7, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v10, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v12, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v13, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v14, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v0, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 4
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v15, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 4
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v16, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v17, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v18, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v19, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v20, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v21, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v22, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v23, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 3
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v25, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v26, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v27, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v28, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v29, (a0) # vscale x 8-byte Folded Spill
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vs1r.v v30, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: addi a0, sp, 16
-; CHECK-RV32-V-NEXT: vs1r.v v31, (a0) # vscale x 8-byte Folded Spill
+; CHECK-RV32-V-NEXT: vs8r.v v24, (a0) # vscale x 64-byte Folded Spill
; CHECK-RV32-V-NEXT: call otherfoo
; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a1, a0, 5
-; CHECK-RV32-V-NEXT: sub a0, a1, a0
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v1, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v2, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v3, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v4, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v5, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 3
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v6, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
; CHECK-RV32-V-NEXT: slli a0, a0, 3
; CHECK-RV32-V-NEXT: mv a1, a0
; CHECK-RV32-V-NEXT: slli a0, a0, 1
; CHECK-RV32-V-NEXT: add a0, a0, a1
; CHECK-RV32-V-NEXT: add a0, sp, a0
; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 2
-; CHECK-RV32-V-NEXT: add a0, a0, a1
-; CHECK-RV32-V-NEXT: add a0, sp, a0
-; CHECK-RV32-V-NEXT: addi a0, a0, 16
-; CHECK-RV32-V-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
-; CHECK-RV32-V-NEXT: csrr a0, vlenb
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: mv a1, a0
-; CHECK-RV32-V-NEXT: slli a0, a0, 1
-; CHECK-RV32-V-NEXT: add a1, a1, ...
[truncated]
|
lenary
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice improvement!
wangpc-pp
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
topperc
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
Thanks! |
) In TargetFrameLowering::determineCalleeSaves, any vector register is marked as saved if any of its subregister is clobbered, this is not correct in vector registers. We only want the vector register to be marked as saved only if all of its subregisters are clobbered. This patch handles vector callee saved registers in target hook.
In TargetFrameLowering::determineCalleeSaves, any vector register is marked
as saved if any of its subregister is clobbered, this is not correct in
vector registers. We only want the vector register to be marked as saved
only if all of its subregisters are clobbered.
This patch handles vector callee saved registers in target hook.