diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 0656bfbef6b35..4d26f77d4ed2c 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -56,6 +56,8 @@ class RISCVExpandPseudo : public MachineFunctionPass { MachineBasicBlock::iterator MBBI); bool expandRV32ZdinxLoad(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandPseudoReadVLENBViaVSETVLIX0(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI); #ifndef NDEBUG unsigned getInstSizeInBytes(const MachineFunction &MF) const { unsigned Size = 0; @@ -164,6 +166,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB, case RISCV::PseudoVMSET_M_B64: // vmset.m vd => vmxnor.mm vd, vd, vd return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM); + case RISCV::PseudoReadVLENBViaVSETVLIX0: + return expandPseudoReadVLENBViaVSETVLIX0(MBB, MBBI); } return false; @@ -415,6 +419,24 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB, return true; } +bool RISCVExpandPseudo::expandPseudoReadVLENBViaVSETVLIX0( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { + DebugLoc DL = MBBI->getDebugLoc(); + Register Dst = MBBI->getOperand(0).getReg(); + unsigned Mul = MBBI->getOperand(1).getImm(); + RISCVVType::VLMUL VLMUL = RISCVVType::encodeLMUL(Mul, /*Fractional=*/false); + unsigned VTypeImm = RISCVVType::encodeVTYPE( + VLMUL, /*SEW=*/8, /*TailAgnostic=*/true, /*MaskAgnostic=*/true); + + BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoVSETVLIX0)) + .addReg(Dst, RegState::Define) + .addReg(RISCV::X0, RegState::Kill) + .addImm(VTypeImm); + + MBBI->eraseFromParent(); + return true; +} + class RISCVPreRAExpandPseudo : public MachineFunctionPass { public: const RISCVSubtarget *STI; diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 21119f624339c..46f1a2f9e302d 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1534,6 +1534,12 @@ def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush", "true", "VXRM writes causes pipeline flush">; +def TunePreferVsetvliOverReadVLENB + : SubtargetFeature<"prefer-vsetvli-over-read-vlenb", + "PreferVsetvliOverReadVLENB", + "true", + "Prefer vsetvli over read vlenb CSR to calculate VLEN">; + // Assume that lock-free native-width atomics are available, even if the target // and operating system combination would not usually provide them. The user // is responsible for providing any necessary __sync implementations. Code diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index e1314d4fee8a0..fd7471599f35c 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -2195,6 +2195,17 @@ bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { const MachineFunction *MF = MBB.getParent(); const auto *RVFI = MF->getInfo(); + // Make sure VTYPE and VL are not live-in since we will use vsetvli in the + // prologue to get the VLEN, and that will clobber these registers. + // + // We may do also check the stack contains objects with scalable vector type, + // but this will require iterating over all the stack objects, but this may + // not worth since the situation is rare, we could do further check in future + // if we find it is necessary. + if (STI.preferVsetvliOverReadVLENB() && + (MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL))) + return false; + if (!RVFI->useSaveRestoreLibCalls(*MF)) return true; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 6d3c005583c29..be1b36e9d77c2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -6049,6 +6049,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in { [(set GPR:$rd, (riscv_read_vlenb))]>, PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVLENB.Encoding, X0)>, Sched<[WriteRdVLENB]>; + let Defs = [VL, VTYPE] in { + def PseudoReadVLENBViaVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins uimm5:$shamt), + []>, + Sched<[WriteVSETVLI, ReadVSETVLI]>; + } } let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1, diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 12d54313a96ab..07ae6c79acc1b 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -226,21 +226,48 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB, assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) && "Expect the number of vector registers within 32-bits."); uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8); - BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg) - .setMIFlag(Flag); - - if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() && - (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) { - unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD : - (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD); - BuildMI(MBB, II, DL, TII->get(Opc), DestReg) - .addReg(ScratchReg, RegState::Kill).addReg(SrcReg) + // Only use vsetvli rather than vlenb if adjusting in the prologue or + // epilogue, otherwise it may disturb the VTYPE and VL status. + bool IsPrologueOrEpilogue = + Flag == MachineInstr::FrameSetup || Flag == MachineInstr::FrameDestroy; + bool UseVsetvliRatherThanVlenb = + IsPrologueOrEpilogue && ST.preferVsetvliOverReadVLENB(); + if (UseVsetvliRatherThanVlenb && (NumOfVReg == 1 || NumOfVReg == 2 || + NumOfVReg == 4 || NumOfVReg == 8)) { + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENBViaVSETVLIX0), + ScratchReg) + .addImm(NumOfVReg) .setMIFlag(Flag); - } else { - TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag); BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) - .addReg(SrcReg).addReg(ScratchReg, RegState::Kill) + .addReg(SrcReg) + .addReg(ScratchReg, RegState::Kill) .setMIFlag(Flag); + } else { + if (UseVsetvliRatherThanVlenb) + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENBViaVSETVLIX0), + ScratchReg) + .addImm(1) + .setMIFlag(Flag); + else + BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg) + .setMIFlag(Flag); + + if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() && + (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) { + unsigned Opc = NumOfVReg == 2 + ? RISCV::SH1ADD + : (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD); + BuildMI(MBB, II, DL, TII->get(Opc), DestReg) + .addReg(ScratchReg, RegState::Kill) + .addReg(SrcReg) + .setMIFlag(Flag); + } else { + TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag); + BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg) + .addReg(SrcReg) + .addReg(ScratchReg, RegState::Kill) + .setMIFlag(Flag); + } } SrcReg = DestReg; KillSrcReg = true; diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll index 7fe6bd24a2552..cba507874a32e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll +++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll @@ -5,6 +5,12 @@ ; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+prefer-vsetvli-over-read-vlenb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-NOZBA-VSETVLI +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba,+prefer-vsetvli-over-read-vlenb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-ZBA-VSETVLI +; RUN: llc -mtriple=riscv64 -mattr=+v,+prefer-vsetvli-over-read-vlenb -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-NOMUL-VSETVLI define void @lmul1() nounwind { ; CHECK-LABEL: lmul1: @@ -14,6 +20,30 @@ define void @lmul1() nounwind { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul1: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul1: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul1: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v = alloca ret void } @@ -47,6 +77,30 @@ define void @lmul2() nounwind { ; NOMUL-NEXT: slli a0, a0, 1 ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul2: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul2: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul2: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v = alloca ret void } @@ -67,6 +121,51 @@ define void @lmul4() nounwind { ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul4: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul4: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul4: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v = alloca ret void } @@ -87,6 +186,51 @@ define void @lmul8() nounwind { ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul8: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul8: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul8: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v = alloca ret void } @@ -120,6 +264,30 @@ define void @lmul1_and_2() nounwind { ; NOMUL-NEXT: slli a0, a0, 2 ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul1_and_2: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul1_and_2: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul1_and_2: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -141,6 +309,51 @@ define void @lmul2_and_4() nounwind { ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul2_and_4: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul2_and_4: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul2_and_4: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -162,6 +375,51 @@ define void @lmul1_and_4() nounwind { ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul1_and_4: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul1_and_4: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul1_and_4: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -201,6 +459,40 @@ define void @lmul2_and_1() nounwind { ; NOMUL-NEXT: add a0, a1, a0 ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul2_and_1: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: slli a1, a0, 1 +; CHECK-NOZBA-VSETVLI-NEXT: add a0, a1, a0 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: slli a1, a0, 1 +; CHECK-NOZBA-VSETVLI-NEXT: add a0, a1, a0 +; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul2_and_1: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul2_and_1: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a1, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a1, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -259,6 +551,59 @@ define void @lmul4_and_1() nounwind { ; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 48 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_1: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 6 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_1: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_1: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -317,6 +662,59 @@ define void @lmul4_and_2() nounwind { ; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 48 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_2: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 6 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_2: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_2: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca ret void @@ -377,6 +775,61 @@ define void @lmul4_and_2_x2_0() nounwind { ; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 48 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_2_x2_0: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 14 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_2_x2_0: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: li a1, 14 +; CHECK-ZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_2_x2_0: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a1, a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca @@ -437,6 +890,59 @@ define void @lmul4_and_2_x2_1() nounwind { ; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 48 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_2_x2_1: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 12 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_2_x2_1: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 2 +; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_2_x2_1: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 2 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v3 = alloca %v2 = alloca @@ -486,6 +992,42 @@ define void @gpr_and_lmul1_and_2() nounwind { ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: addi sp, sp, 16 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: gpr_and_lmul1_and_2: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -16 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: li a0, 3 +; CHECK-NOZBA-VSETVLI-NEXT: sd a0, 8(sp) +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 16 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: gpr_and_lmul1_and_2: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -16 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: li a0, 3 +; CHECK-ZBA-VSETVLI-NEXT: sd a0, 8(sp) +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 16 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: gpr_and_lmul1_and_2: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -16 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: li a0, 3 +; CHECK-NOMUL-VSETVLI-NEXT: sd a0, 8(sp) +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 16 +; CHECK-NOMUL-VSETVLI-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -511,6 +1053,57 @@ define void @gpr_and_lmul1_and_4() nounwind { ; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 48 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: gpr_and_lmul1_and_4: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOZBA-VSETVLI-NEXT: li a0, 3 +; CHECK-NOZBA-VSETVLI-NEXT: sd a0, 8(sp) +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: gpr_and_lmul1_and_4: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-ZBA-VSETVLI-NEXT: li a0, 3 +; CHECK-ZBA-VSETVLI-NEXT: sd a0, 8(sp) +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: gpr_and_lmul1_and_4: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32 +; CHECK-NOMUL-VSETVLI-NEXT: li a0, 3 +; CHECK-NOMUL-VSETVLI-NEXT: sd a0, 8(sp) +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48 +; CHECK-NOMUL-VSETVLI-NEXT: ret %x1 = alloca i64 %v1 = alloca %v2 = alloca @@ -534,6 +1127,54 @@ define void @lmul_1_2_4_8() nounwind { ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul_1_2_4_8: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: slli a0, a0, 4 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul_1_2_4_8: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 4 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul_1_2_4_8: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 4 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca %v4 = alloca @@ -557,6 +1198,54 @@ define void @lmul_1_2_4_8_x2_0() nounwind { ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_0: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: slli a0, a0, 5 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_0: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 5 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul_1_2_4_8_x2_0: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 5 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca @@ -584,6 +1273,54 @@ define void @lmul_1_2_4_8_x2_1() nounwind { ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 80 ; CHECK-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_1: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: slli a0, a0, 5 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_1: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 5 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul_1_2_4_8_x2_1: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 5 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v8 = alloca %v7 = alloca %v6 = alloca @@ -624,6 +1361,30 @@ define void @masks() nounwind { ; NOMUL-NEXT: slli a0, a0, 2 ; NOMUL-NEXT: add sp, sp, a0 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: masks: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: masks: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: masks: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca %v4 = alloca @@ -684,6 +1445,59 @@ define void @lmul_8_x5() nounwind { ; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 80 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul_8_x5: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 40 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul_8_x5: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-ZBA-VSETVLI-NEXT: sh2add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul_8_x5: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 2 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca @@ -745,6 +1559,59 @@ define void @lmul_8_x9() nounwind { ; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 80 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul_8_x9: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 72 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul_8_x9: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-ZBA-VSETVLI-NEXT: sh3add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul_8_x9: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64 +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca %v3 = alloca @@ -851,6 +1718,100 @@ define void @lmul_16_align() nounwind { ; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload ; NOMUL-NEXT: addi sp, sp, 144 ; NOMUL-NEXT: ret +; +; CHECK-NOZBA-VSETVLI-LABEL: lmul_16_align: +; CHECK-NOZBA-VSETVLI: # %bb.0: +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -144 +; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 144 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: li a1, 24 +; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -128 +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: vmv.v.i v8, 0 +; CHECK-NOZBA-VSETVLI-NEXT: csrr a0, vlenb +; CHECK-NOZBA-VSETVLI-NEXT: add a0, sp, a0 +; CHECK-NOZBA-VSETVLI-NEXT: addi a0, a0, 128 +; CHECK-NOZBA-VSETVLI-NEXT: csrr a1, vlenb +; CHECK-NOZBA-VSETVLI-NEXT: vs8r.v v8, (a0) +; CHECK-NOZBA-VSETVLI-NEXT: slli a1, a1, 3 +; CHECK-NOZBA-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOZBA-VSETVLI-NEXT: vs8r.v v8, (a0) +; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NOZBA-VSETVLI-NEXT: vmv.v.i v8, 0 +; CHECK-NOZBA-VSETVLI-NEXT: addi a0, sp, 128 +; CHECK-NOZBA-VSETVLI-NEXT: vs1r.v v8, (a0) +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -144 +; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 144 +; CHECK-NOZBA-VSETVLI-NEXT: ret +; +; CHECK-ZBA-VSETVLI-LABEL: lmul_16_align: +; CHECK-ZBA-VSETVLI: # %bb.0: +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -144 +; CHECK-ZBA-VSETVLI-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 144 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0 +; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -128 +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: vmv.v.i v8, 0 +; CHECK-ZBA-VSETVLI-NEXT: csrr a0, vlenb +; CHECK-ZBA-VSETVLI-NEXT: add a0, sp, a0 +; CHECK-ZBA-VSETVLI-NEXT: addi a0, a0, 128 +; CHECK-ZBA-VSETVLI-NEXT: csrr a1, vlenb +; CHECK-ZBA-VSETVLI-NEXT: vs8r.v v8, (a0) +; CHECK-ZBA-VSETVLI-NEXT: sh3add a0, a1, a0 +; CHECK-ZBA-VSETVLI-NEXT: vs8r.v v8, (a0) +; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-ZBA-VSETVLI-NEXT: vmv.v.i v8, 0 +; CHECK-ZBA-VSETVLI-NEXT: addi a0, sp, 128 +; CHECK-ZBA-VSETVLI-NEXT: vs1r.v v8, (a0) +; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -144 +; CHECK-ZBA-VSETVLI-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 144 +; CHECK-ZBA-VSETVLI-NEXT: ret +; +; CHECK-NOMUL-VSETVLI-LABEL: lmul_16_align: +; CHECK-NOMUL-VSETVLI: # %bb.0: +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -144 +; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 128(sp) # 8-byte Folded Spill +; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 144 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3 +; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0 +; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -128 +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: vmv.v.i v8, 0 +; CHECK-NOMUL-VSETVLI-NEXT: csrr a0, vlenb +; CHECK-NOMUL-VSETVLI-NEXT: add a0, sp, a0 +; CHECK-NOMUL-VSETVLI-NEXT: addi a0, a0, 128 +; CHECK-NOMUL-VSETVLI-NEXT: csrr a1, vlenb +; CHECK-NOMUL-VSETVLI-NEXT: vs8r.v v8, (a0) +; CHECK-NOMUL-VSETVLI-NEXT: slli a1, a1, 3 +; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1 +; CHECK-NOMUL-VSETVLI-NEXT: vs8r.v v8, (a0) +; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NOMUL-VSETVLI-NEXT: vmv.v.i v8, 0 +; CHECK-NOMUL-VSETVLI-NEXT: addi a0, sp, 128 +; CHECK-NOMUL-VSETVLI-NEXT: vs1r.v v8, (a0) +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -144 +; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 128(sp) # 8-byte Folded Reload +; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 144 +; CHECK-NOMUL-VSETVLI-NEXT: ret %v1 = alloca %v2 = alloca store zeroinitializer, ptr %v1 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll index 1205ff17d113e..c9e11de38007a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -5,6 +5,12 @@ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zcmp -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zcmp,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP-VSETVLI %s @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 @@ -131,6 +137,123 @@ define @foo( %a, @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i32 7, i32 %gvl) %call = call signext i32 @puts(ptr @.str) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll index f0cd067fd0448..80b6c45b9c1ac 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -3,6 +3,10 @@ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s ; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s define @spill_lmul_mf2( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_mf2: @@ -35,6 +39,37 @@ define @spill_lmul_mf2( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_mf2: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_mf2: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -73,6 +108,37 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_1: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_1: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -115,6 +181,37 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_2: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_2: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -157,6 +254,37 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_4: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_4: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -199,6 +327,37 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_8: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_8: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll index adb15f02e33a4..485015577b8af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -5,6 +5,10 @@ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32: @@ -74,6 +78,50 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i32 %vl, i32 5) call void asm sideeffect "", @@ -150,6 +198,50 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i32 %vl, i32 5) call void asm sideeffect "", @@ -231,6 +323,53 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i32 %vl, i32 5) call void asm sideeffect "", @@ -312,6 +451,53 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m4_v12m4 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv4r.v v8, v12 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 +; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i32 %vl, i32 5) call void asm sideeffect "", @@ -403,6 +589,61 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2_v12m2 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg3e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: li a3, 6 +; SPILL-O2-VSETVLI-NEXT: mul a2, a2, a3 +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: li a1, 6 +; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) undef, ptr %base, i32 %vl, i32 5) call void asm sideeffect "", diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll index 2cd80ef79bd82..e885f25cf0b26 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -7,6 +7,12 @@ ; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zcmp -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zcmp,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP-VSETVLI %s @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 @@ -147,6 +153,108 @@ define @foo( %a, @llvm.riscv.vfadd.nxv1f64.nxv1f64( undef, %a, %b, i64 7, i64 %gvl) %call = call signext i32 @puts(ptr @.str) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll index 957a23f0069b8..c8397a2e57317 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -5,6 +5,10 @@ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-max=128 -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s define @spill_lmul_1( %va) nounwind { ; SPILL-O0-LABEL: spill_lmul_1: @@ -50,6 +54,37 @@ define @spill_lmul_1( %va) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_1: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_1: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -105,6 +140,37 @@ define @spill_lmul_2( %va) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_2: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_2: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -160,6 +226,37 @@ define @spill_lmul_4( %va) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_4: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_4: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() @@ -215,6 +312,37 @@ define @spill_lmul_8( %va) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_lmul_8: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_lmul_8: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll index ff0f1d7748668..092496e613cf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -5,6 +5,10 @@ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O0 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O2 < %s \ +; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32: @@ -74,6 +78,50 @@ define @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv1i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i64 %vl, i64 5) call void asm sideeffect "", @@ -150,6 +198,50 @@ define @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv2i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i64 %vl, i64 5) call void asm sideeffect "", @@ -231,6 +323,53 @@ define @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv4i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i64 %vl, i64 5) call void asm sideeffect "", @@ -312,6 +451,53 @@ define @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m4_v12m4 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv4r.v v8, v12 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv8i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2 +; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", , 2) undef, ptr %base, i64 %vl, i64 5) call void asm sideeffect "", @@ -403,6 +589,61 @@ define @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind { ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96 ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16 ; SPILL-O2-VLEN128-NEXT: ret +; +; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O0-VSETVLI: # %bb.0: # %entry +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2_v12m2 +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vlseg3e32.v v8, (a0) +; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma +; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10 +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O0-VSETVLI-NEXT: #APP +; SPILL-O0-VSETVLI-NEXT: #NO_APP +; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O0-VSETVLI-NEXT: ret +; +; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32: +; SPILL-O2-VSETVLI: # %bb.0: # %entry +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16 +; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: li a3, 6 +; SPILL-O2-VSETVLI-NEXT: mul a2, a2, a3 +; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2 +; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0) +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill +; SPILL-O2-VSETVLI-NEXT: #APP +; SPILL-O2-VSETVLI-NEXT: #NO_APP +; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16 +; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb +; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload +; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; SPILL-O2-VSETVLI-NEXT: li a1, 6 +; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1 +; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0 +; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16 +; SPILL-O2-VSETVLI-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", , 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", , 3) undef, ptr %base, i64 %vl, i64 5) call void asm sideeffect "",