diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp index ee90868d252e4..3980b7dd2be83 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp @@ -13,11 +13,8 @@ // The reason why we need to do this: // 1. When tracking register pressure, we don't track physical registers. // 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't -// use it in most RVV pseudos (only used in inline asm constraint and add/sub -// with carry instructions). Instead, we use physical register V0 directly -// and insert a `$v0 = COPY ...` before the use. And, there is a fundamental -// issue in register allocator when handling RegisterClass with only one -// physical register, so we can't simply replace V0 with VMV0. +// use it by the time we reach scheduling. Instead, we use physical +// register V0 directly and insert a `$v0 = COPY ...` before the use. // 3. For mask producers, we are using VR RegisterClass (we can allocate V0-V31 // to it). So if V0 is not available, there are still 31 available registers // out there. @@ -43,66 +40,24 @@ namespace llvm { -static inline bool isVectorMaskProducer(const MachineInstr *MI) { - switch (RISCV::getRVVMCOpcode(MI->getOpcode())) { - // Vector Mask Instructions - case RISCV::VMAND_MM: - case RISCV::VMNAND_MM: - case RISCV::VMANDN_MM: - case RISCV::VMXOR_MM: - case RISCV::VMOR_MM: - case RISCV::VMNOR_MM: - case RISCV::VMORN_MM: - case RISCV::VMXNOR_MM: - case RISCV::VMSBF_M: - case RISCV::VMSIF_M: - case RISCV::VMSOF_M: - // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions - case RISCV::VMADC_VV: - case RISCV::VMADC_VX: - case RISCV::VMADC_VI: - case RISCV::VMADC_VVM: - case RISCV::VMADC_VXM: - case RISCV::VMADC_VIM: - case RISCV::VMSBC_VV: - case RISCV::VMSBC_VX: - case RISCV::VMSBC_VVM: - case RISCV::VMSBC_VXM: - // Vector Integer Compare Instructions - case RISCV::VMSEQ_VV: - case RISCV::VMSEQ_VX: - case RISCV::VMSEQ_VI: - case RISCV::VMSNE_VV: - case RISCV::VMSNE_VX: - case RISCV::VMSNE_VI: - case RISCV::VMSLT_VV: - case RISCV::VMSLT_VX: - case RISCV::VMSLTU_VV: - case RISCV::VMSLTU_VX: - case RISCV::VMSLE_VV: - case RISCV::VMSLE_VX: - case RISCV::VMSLE_VI: - case RISCV::VMSLEU_VV: - case RISCV::VMSLEU_VX: - case RISCV::VMSLEU_VI: - case RISCV::VMSGTU_VX: - case RISCV::VMSGTU_VI: - case RISCV::VMSGT_VX: - case RISCV::VMSGT_VI: - // Vector Floating-Point Compare Instructions - case RISCV::VMFEQ_VV: - case RISCV::VMFEQ_VF: - case RISCV::VMFNE_VV: - case RISCV::VMFNE_VF: - case RISCV::VMFLT_VV: - case RISCV::VMFLT_VF: - case RISCV::VMFLE_VV: - case RISCV::VMFLE_VF: - case RISCV::VMFGT_VF: - case RISCV::VMFGE_VF: - return true; - } - return false; +static bool isCopyToV0(const MachineInstr &MI) { + return MI.isCopy() && MI.getOperand(0).getReg() == RISCV::V0 && + MI.getOperand(1).getReg().isVirtual() && + MI.getOperand(1).getSubReg() == RISCV::NoSubRegister; +} + +static bool isSoleUseCopyToV0(SUnit &SU) { + if (SU.Succs.size() != 1) + return false; + SDep &Dep = SU.Succs[0]; + // Ignore dependencies other than data or strong ordering. + if (Dep.isWeak()) + return false; + + SUnit &DepSU = *Dep.getSUnit(); + if (DepSU.isBoundaryNode()) + return false; + return isCopyToV0(*DepSU.getInstr()); } class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation { @@ -119,7 +74,7 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation { if (MI->findRegisterUseOperand(RISCV::V0, TRI)) NearestUseV0SU = &SU; - if (NearestUseV0SU && NearestUseV0SU != &SU && isVectorMaskProducer(MI) && + if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) && // For LMUL=8 cases, there will be more possibilities to spill. // FIXME: We should use RegPressureTracker to do fine-grained // controls. diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll index 86176761746cb..5ed78e0051c90 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll @@ -88,9 +88,12 @@ define i32 @test_v256i1(<256 x i1> %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v6, v0 @@ -98,26 +101,51 @@ define i32 @test_v256i1(<256 x i1> %x) { ; CHECK-NEXT: vslidedown.vi v5, v8, 8 ; CHECK-NEXT: vslidedown.vi v4, v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v3, v8, 4 -; CHECK-NEXT: vslidedown.vi v2, v0, 4 +; CHECK-NEXT: vslidedown.vi v0, v5, 4 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v5, 4 -; CHECK-NEXT: vslidedown.vi v14, v4, 4 +; CHECK-NEXT: vslidedown.vi v0, v4, 4 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v7, 4 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v6, 4 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: csrr a0, vlenb @@ -125,29 +153,25 @@ define i32 @test_v256i1(<256 x i1> %x) { ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vadd.vv v8, v24, v8 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v24, v0, v24 ; CHECK-NEXT: vadd.vv v8, v16, v8 @@ -156,7 +180,10 @@ define i32 @test_v256i1(<256 x i1> %x) { ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -250,29 +277,31 @@ define i32 @test_nxv128i1( %x) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v8 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a1 -; CHECK-NEXT: vslidedown.vx v5, v0, a1 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v4, v7, a0 -; CHECK-NEXT: vslidedown.vx v3, v0, a0 -; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a0 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v6, a0 +; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v7, a1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v5, a0 ; CHECK-NEXT: vslidedown.vx v5, v6, a0 +; CHECK-NEXT: vslidedown.vx v4, v7, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v3 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t @@ -309,125 +338,118 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x31, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 49 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v3, v10 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v6, v10 +; CHECK-NEXT: vmv1r.v v7, v9 +; CHECK-NEXT: vmv1r.v v5, v8 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vmv.v.i v24, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a2, a2, a0 +; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: srli a0, a1, 1 ; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v11, v0, a0 -; CHECK-NEXT: vslidedown.vx v12, v8, a0 -; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v2, v9, a1 -; CHECK-NEXT: vslidedown.vx v4, v0, a1 -; CHECK-NEXT: vslidedown.vx v1, v10, a1 -; CHECK-NEXT: vslidedown.vx v7, v8, a1 -; CHECK-NEXT: vslidedown.vx v6, v11, a1 -; CHECK-NEXT: vslidedown.vx v5, v12, a1 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vslidedown.vx v12, v11, a0 +; CHECK-NEXT: vslidedown.vx v13, v8, a0 +; CHECK-NEXT: vmv.v.v v0, v12 ; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 5 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a3, a3, a2 -; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v12, a1 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 4 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v13, a1 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v11, a1 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v5, a1 +; CHECK-NEXT: vslidedown.vx v5, v7, a1 +; CHECK-NEXT: vslidedown.vx v4, v6, a1 +; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t ; CHECK-NEXT: vadd.vv v8, v16, v8 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl1r.v v7, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v7, a0 -; CHECK-NEXT: vslidedown.vx v11, v3, a0 +; CHECK-NEXT: vslidedown.vx v11, v6, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v11, a1 ; CHECK-NEXT: vslidedown.vx v12, v8, a1 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vi v16, v16, 1, v0.t ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 @@ -436,27 +458,24 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t ; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v3 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 @@ -464,16 +483,12 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vi v24, v24, 1, v0.t ; CHECK-NEXT: vadd.vv v24, v24, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v16, v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 @@ -485,9 +500,8 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a2, a2, a1 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: add sp, sp, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 9bd1da2e53dce..eb40c133514fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -46,10 +46,11 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; CHECK-NEXT: vslideup.vi v12, v10, 2, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv.v.i v10, 12 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vslidedown.vi v8, v8, 6, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 ; CHECK-NEXT: ret %z = shufflevector <8 x float> %x, <8 x float> %y, <4 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index c628a0d620498..16bb2105f8680 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -698,15 +698,16 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 73 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmv.v.i v8, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: li a1, 146 -; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 2, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <9 x i8> , ptr %x @@ -973,27 +974,27 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.i v0, 15 -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a0, 512 ; RV32-NEXT: li a1, 240 -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: li a1, 15 -; RV32-NEXT: vmerge.vim v10, v9, -1, v0 +; RV32-NEXT: vmerge.vim v9, v8, -1, v0 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v12, 3 -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: li a1, 15 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma -; RV32-NEXT: vmerge.vim v10, v9, -1, v0 -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmerge.vim v9, v8, -1, v0 +; RV32-NEXT: slli a1, a1, 8 +; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma -; RV32-NEXT: vmerge.vim v8, v9, -1, v0 +; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v8, v12, 2, v0 @@ -1003,25 +1004,23 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.i v0, 3 -; RV64V-NEXT: vmv.v.i v9, 0 +; RV64V-NEXT: vmv.v.i v8, 0 ; RV64V-NEXT: li a0, 512 -; RV64V-NEXT: vmv.v.i v8, 12 -; RV64V-NEXT: li a1, 48 -; RV64V-NEXT: vmerge.vim v10, v9, -1, v0 +; RV64V-NEXT: vmerge.vim v9, v8, -1, v0 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmv.v.i v12, 3 -; RV64V-NEXT: vmv1r.v v0, v10 +; RV64V-NEXT: vmv1r.v v0, v9 ; RV64V-NEXT: vmerge.vim v12, v12, 0, v0 -; RV64V-NEXT: vmv1r.v v0, v8 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64V-NEXT: vmerge.vim v10, v9, -1, v0 -; RV64V-NEXT: vmv.s.x v8, a1 -; RV64V-NEXT: vmv.v.v v0, v10 +; RV64V-NEXT: vmv.v.i v0, 12 +; RV64V-NEXT: vmerge.vim v9, v8, -1, v0 +; RV64V-NEXT: li a1, 48 +; RV64V-NEXT: vmv.v.v v0, v9 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64V-NEXT: vmv1r.v v0, v8 +; RV64V-NEXT: vmv.s.x v0, a1 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64V-NEXT: vmerge.vim v8, v9, -1, v0 +; RV64V-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64V-NEXT: vmv.v.v v0, v8 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v8, v12, 2, v0 @@ -1031,27 +1030,27 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v0, 15 -; RV64ZVE32-NEXT: vmv.v.i v9, 0 +; RV64ZVE32-NEXT: vmv.v.i v8, 0 ; RV64ZVE32-NEXT: li a0, 512 ; RV64ZVE32-NEXT: li a1, 240 -; RV64ZVE32-NEXT: vmv.s.x v8, a1 -; RV64ZVE32-NEXT: li a1, 15 -; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0 +; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v12, 3 -; RV64ZVE32-NEXT: slli a1, a1, 8 -; RV64ZVE32-NEXT: vmv1r.v v0, v10 +; RV64ZVE32-NEXT: vmv1r.v v0, v9 ; RV64ZVE32-NEXT: vmerge.vim v12, v12, 0, v0 -; RV64ZVE32-NEXT: vmv1r.v v0, v8 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: li a1, 15 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0 -; RV64ZVE32-NEXT: vmv.s.x v8, a1 -; RV64ZVE32-NEXT: vmv.v.v v0, v10 +; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0 +; RV64ZVE32-NEXT: slli a1, a1, 8 +; RV64ZVE32-NEXT: vmv.v.v v0, v9 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64ZVE32-NEXT: vmv1r.v v0, v8 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; RV64ZVE32-NEXT: vmerge.vim v8, v9, -1, v0 +; RV64ZVE32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64ZVE32-NEXT: vmv.v.v v0, v8 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v8, v12, 2, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index f307ebb422c6c..cd73dbadb2d03 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -105,11 +105,10 @@ define <4 x i16> @vrgather_shuffle_vx_v4i16(<4 x i16> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vslidedown.vi v8, v8, 2, v0.t -; CHECK-NEXT: vmv.v.i v10, 5 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmv.v.i v0, 3 +; CHECK-NEXT: vmv.v.i v9, 5 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> , <4 x i32> ret <4 x i16> %s @@ -971,13 +970,12 @@ define <8 x i32> @shuffle_repeat3_singlesrc_e32(<8 x i32> %v) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v0, 7 -; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: li a0, 192 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmerge.vim v9, v9, 0, v0 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 -; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 2, v0 +; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 30751f8ea706b..39fd70beb9ee2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1046,44 +1046,45 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-LABEL: mulhu_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: lui a1, 3 -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: lui a2, %hi(.LCPI65_0) ; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0) -; CHECK-NEXT: vle8.v v11, (a2) -; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: vle8.v v10, (a2) +; CHECK-NEXT: li a2, -128 ; CHECK-NEXT: addi a1, a1, -2044 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: addi a1, a2, 32 -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: lui a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 -; CHECK-NEXT: li a1, 513 -; CHECK-NEXT: vmv.v.i v13, 4 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vxm v11, v9, a2, v0 +; CHECK-NEXT: addi a2, a1, 32 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: addi a1, a2, 78 +; CHECK-NEXT: vmv.s.x v0, a2 +; CHECK-NEXT: li a2, 513 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v12, 4 +; CHECK-NEXT: addi a1, a1, 78 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v10, v13, 1, v0 +; CHECK-NEXT: vsrl.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vv v8, v9, v8 -; CHECK-NEXT: vmulhu.vv v8, v8, v11 -; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 +; CHECK-NEXT: vmulhu.vv v9, v9, v10 +; CHECK-NEXT: vmerge.vim v10, v12, 3, v0 ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsub.vv v9, v9, v8 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v11 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v9, v8 +; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 2, v0 ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) @@ -3153,48 +3154,49 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK-LABEL: mulhu_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: lui a2, 163907 -; CHECK-NEXT: addi a2, a2, -2044 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: lui a2, 66049 -; CHECK-NEXT: addi a2, a2, 32 -; CHECK-NEXT: vmv.s.x v8, a2 -; CHECK-NEXT: li a2, -128 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: lui a2, %hi(.LCPI181_0) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI181_0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle8.v v8, (a2) +; CHECK-NEXT: lui a1, 163907 +; CHECK-NEXT: addi a1, a1, -2044 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vxm v12, v10, a2, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI181_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI181_0) -; CHECK-NEXT: vle8.v v14, (a0) -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vle8.v v10, (a1) +; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 +; CHECK-NEXT: lui a1, 66049 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 8208 +; CHECK-NEXT: vle8.v v14, (a0) ; CHECK-NEXT: addi a1, a1, 513 -; CHECK-NEXT: vsrl.vv v8, v14, v8 -; CHECK-NEXT: vmulhu.vv v10, v8, v10 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 66785 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v16, 4 ; CHECK-NEXT: addi a1, a1, 78 -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: lui a1, 529160 +; CHECK-NEXT: vsrl.vv v10, v14, v10 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vsub.vv v14, v14, v10 -; CHECK-NEXT: vmulhu.vv v12, v14, v12 -; CHECK-NEXT: vmv.v.i v14, 4 +; CHECK-NEXT: vmulhu.vv v8, v10, v8 +; CHECK-NEXT: vmerge.vim v10, v16, 3, v0 +; CHECK-NEXT: lui a1, 529160 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vmerge.vim v14, v14, 1, v0 +; CHECK-NEXT: vsub.vv v14, v14, v8 +; CHECK-NEXT: vmulhu.vv v12, v14, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v14, v14, 3, v0 -; CHECK-NEXT: vadd.vv v10, v12, v10 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v14, 2, v0 -; CHECK-NEXT: vsrl.vv v8, v10, v8 +; CHECK-NEXT: vadd.vv v8, v12, v8 +; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x @@ -3215,27 +3217,28 @@ define void @mulhu_v16i16(ptr %x) { ; RV32-NEXT: vmerge.vxm v12, v8, a1, v0 ; RV32-NEXT: lui a1, 4 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v14, 0 +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: addi a1, a1, 64 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: lui a1, 2 ; RV32-NEXT: addi a1, a1, 289 -; RV32-NEXT: vmv.s.x v9, a1 +; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, %hi(.LCPI182_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v15, 3 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v14, v14, 1, v0 -; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmerge.vim v9, v15, 2, v0 +; RV32-NEXT: vmv.v.i v14, 3 +; RV32-NEXT: vmerge.vim v14, v14, 2, v0 ; RV32-NEXT: vle16.v v16, (a1) ; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v9, 1, v0 +; RV32-NEXT: vmerge.vim v8, v14, 1, v0 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vsext.vf2 v18, v14 -; RV32-NEXT: vsrl.vv v14, v10, v18 +; RV32-NEXT: vsext.vf2 v14, v9 +; RV32-NEXT: vsrl.vv v14, v10, v14 ; RV32-NEXT: vmulhu.vv v14, v14, v16 ; RV32-NEXT: vsub.vv v10, v10, v14 ; RV32-NEXT: vmulhu.vv v10, v10, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 7cc8c0c3f2d89..be6d84fb97700 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -183,383 +183,343 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 88 +; RV32-NEXT: li a3, 96 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xe0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 96 * vlenb ; RV32-NEXT: addi a4, a1, 128 ; RV32-NEXT: addi a5, a1, 256 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: lui a3, 12 -; RV32-NEXT: lui a6, 12291 -; RV32-NEXT: lui a7, %hi(.LCPI8_0) -; RV32-NEXT: addi a7, a7, %lo(.LCPI8_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a5) ; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li t0, 48 -; RV32-NEXT: mul a5, a5, t0 +; RV32-NEXT: li a6, 80 +; RV32-NEXT: mul a5, a5, a6 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v3, a3 +; RV32-NEXT: lui a5, 12291 +; RV32-NEXT: vmv.s.x v7, a3 ; RV32-NEXT: vle32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a5, 72 -; RV32-NEXT: mul a1, a1, a5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v8, (a4) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 80 -; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: li a6, 88 +; RV32-NEXT: mul a1, a1, a6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a6, a6, 3 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle16.v v6, (a7) -; RV32-NEXT: vmv.s.x v2, a6 ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vslideup.vi v8, v24, 10, v0.t +; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 56 -; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: li a6, 76 +; RV32-NEXT: mul a1, a1, a6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vle32.v v8, (a4) +; RV32-NEXT: addi a5, a5, 3 +; RV32-NEXT: vmv.s.x v0, a5 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 72 +; RV32-NEXT: li a4, 80 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vslideup.vi v28, v16, 2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 80 +; RV32-NEXT: li a4, 88 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v16, v6 +; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: slli a1, a1, 6 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; RV32-NEXT: vslideup.vi v8, v16, 2 -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: vslideup.vi v8, v24, 8, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; RV32-NEXT: vslideup.vi v28, v16, 8, v0.t ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 60 +; RV32-NEXT: li a4, 72 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, 49164 -; RV32-NEXT: lui a4, %hi(.LCPI8_1) -; RV32-NEXT: addi a4, a4, %lo(.LCPI8_1) -; RV32-NEXT: lui a5, 196656 -; RV32-NEXT: lui a6, %hi(.LCPI8_2) -; RV32-NEXT: addi a6, a6, %lo(.LCPI8_2) -; RV32-NEXT: addi a1, a1, 12 -; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vle16.v v8, (a4) +; RV32-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI8_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_0) +; RV32-NEXT: lui a4, 49164 +; RV32-NEXT: lui a5, %hi(.LCPI8_1) +; RV32-NEXT: addi a5, a5, %lo(.LCPI8_1) +; RV32-NEXT: vle16.v v28, (a1) +; RV32-NEXT: addi a4, a4, 12 +; RV32-NEXT: vle16.v v30, (a5) +; RV32-NEXT: vmv.s.x v24, a4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v8, a5 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v16, v0, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 12 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle16.v v8, (a6) +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 20 +; RV32-NEXT: li a4, 56 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 80 +; RV32-NEXT: li a4, 88 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 72 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v0, v8, v24 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 12 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v0, v8, v30 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 80 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a5, 196656 +; RV32-NEXT: lui a1, %hi(.LCPI8_2) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_2) +; RV32-NEXT: lui a6, 3 +; RV32-NEXT: lui a7, 786624 +; RV32-NEXT: lui t0, 768 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: addi a6, a6, 3 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li t1, 88 +; RV32-NEXT: mul a5, a5, t1 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 20 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v24 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a1, a1, a4 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: lui a1, 3 -; RV32-NEXT: lui a4, 786624 -; RV32-NEXT: lui a5, 768 -; RV32-NEXT: li a6, 48 -; RV32-NEXT: lui a7, 3073 -; RV32-NEXT: addi a1, a1, 3 -; RV32-NEXT: addi a4, a4, 192 -; RV32-NEXT: addi a3, a3, 12 -; RV32-NEXT: addi a5, a5, 768 -; RV32-NEXT: addi a7, a7, -1024 -; RV32-NEXT: vmv.s.x v8, a6 +; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 3 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v0, a6 +; RV32-NEXT: addi a5, a7, 192 ; RV32-NEXT: csrr a6, vlenb -; RV32-NEXT: li t0, 20 -; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: slli a6, a6, 6 ; RV32-NEXT: add a6, sp, a6 ; RV32-NEXT: addi a6, a6, 16 -; RV32-NEXT: vs1r.v v8, (a6) # Unknown-size Folded Spill -; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vmv.s.x v16, a4 -; RV32-NEXT: vmv.s.x v3, a3 -; RV32-NEXT: vmv.s.x v1, a5 -; RV32-NEXT: vmv.s.x v2, a7 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 -; RV32-NEXT: vmv1r.v v0, v16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li a7, 80 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmerge.vvm v4, v16, v24, v0 +; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: addi a3, a3, 12 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 88 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 5 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: addi a3, t0, 768 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vmerge.vvm v20, v16, v24, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 28 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs4r.v v20, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v0, a3 +; RV32-NEXT: lui a3, 3073 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 56 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: li a6, 88 +; RV32-NEXT: mul a5, a5, a6 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV32-NEXT: csrr a5, vlenb +; RV32-NEXT: slli a5, a5, 4 +; RV32-NEXT: add a5, sp, a5 +; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v12, v8, v24, v0 +; RV32-NEXT: vle16.v v2, (a1) +; RV32-NEXT: addi a1, a3, -1024 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 80 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vmerge.vvm v20, v16, v8, v0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs4r.v v20, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV32-NEXT: vrgatherei16.vv v24, v8, v2 ; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 20 +; RV32-NEXT: li a3, 88 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v12, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 20 +; RV32-NEXT: li a3, 56 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v2 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v16, v8, v0 +; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 88 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI8_3) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV32-NEXT: li a2, 192 ; RV32-NEXT: vmv.s.x v0, a2 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v3, (a1) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 80 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v24, v0 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 80 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v12, v8 +; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 60 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 72 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vmv.v.v v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 60 +; RV32-NEXT: li a2, 72 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v4, v3 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vrgatherei16.vv v0, v4, v12 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv.v.v v0, v8 ; RV32-NEXT: lui a1, %hi(.LCPI8_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4) ; RV32-NEXT: lui a2, %hi(.LCPI8_5) @@ -567,101 +527,109 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV32-NEXT: vle16.v v24, (a1) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v28, (a2) +; RV32-NEXT: vle16.v v5, (a2) ; RV32-NEXT: lui a1, %hi(.LCPI8_6) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle16.v v30, (a1) +; RV32-NEXT: vle16.v v6, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 12 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v24 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v16, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v12, v28 +; RV32-NEXT: vrgatherei16.vv v24, v12, v5 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v24, v8 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v0, v8, v30 +; RV32-NEXT: vrgatherei16.vv v8, v16, v6 ; RV32-NEXT: lui a1, %hi(.LCPI8_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7) ; RV32-NEXT: lui a2, %hi(.LCPI8_8) ; RV32-NEXT: addi a2, a2, %lo(.LCPI8_8) ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v8, (a1) +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: lui a1, %hi(.LCPI8_9) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9) ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v10, (a2) +; RV32-NEXT: vle16.v v28, (a2) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle16.v v9, (a1) +; RV32-NEXT: vle16.v v30, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 20 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v12, v28, v8 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v4, v16, v12 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v12, v0 +; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 88 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v0, v10 +; RV32-NEXT: vrgatherei16.vv v8, v16, v28 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 80 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v28, v4, v9 +; RV32-NEXT: vrgatherei16.vv v8, v16, v30 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v28, v16 +; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v28, (a1) +; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v12, (a1) +; RV32-NEXT: vse32.v v4, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 6 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v0, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 60 +; RV32-NEXT: li a3, 72 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 88 +; RV32-NEXT: li a1, 96 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 @@ -681,7 +649,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -690,93 +658,73 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a3, a1, 256 ; RV64-NEXT: li a4, 128 ; RV64-NEXT: lui a1, 1 -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a5, 77 -; RV64-NEXT: mul a2, a2, a5 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: lui a2, %hi(.LCPI8_0) -; RV64-NEXT: addi a2, a2, %lo(.LCPI8_0) -; RV64-NEXT: vle64.v v8, (a3) +; RV64-NEXT: vle64.v v16, (a3) +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a5, 85 +; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: lui a3, %hi(.LCPI8_0) +; RV64-NEXT: addi a3, a3, %lo(.LCPI8_0) ; RV64-NEXT: vmv.s.x v0, a4 -; RV64-NEXT: addi a3, a1, 65 -; RV64-NEXT: vle16.v v16, (a2) -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a4, 53 -; RV64-NEXT: mul a2, a2, a4 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v6, a3 +; RV64-NEXT: addi a4, a1, 65 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vslideup.vi v20, v8, 2 +; RV64-NEXT: vslideup.vi v8, v16, 2 ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v8, 8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vslidedown.vi v16, v16, 8 +; RV64-NEXT: vmv1r.v v20, v0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v20, v24, 5, v0.t -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 6 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vslideup.vi v8, v16, 5, v0.t +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 49 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 16 +; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV64-NEXT: csrr a5, vlenb +; RV64-NEXT: li a6, 73 +; RV64-NEXT: mul a5, a5, a6 +; RV64-NEXT: add a5, sp, a5 +; RV64-NEXT: addi a5, a5, 16 +; RV64-NEXT: vs4r.v v8, (a5) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: li a5, 61 +; RV64-NEXT: mul a2, a2, a5 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vle16.v v22, (a3) +; RV64-NEXT: vmv.s.x v0, a4 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v24, v16, v24, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl2r.v v14, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v0, v24, v14 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v8, v24, v0 +; RV64-NEXT: vrgatherei16.vv v0, v24, v22 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 41 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv4r.v v24, v8 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 57 +; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vslideup.vi v8, v24, 1 -; RV64-NEXT: vmv1r.v v1, v12 -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vmv1r.v v0, v20 +; RV64-NEXT: vmv1r.v v3, v20 ; RV64-NEXT: vslideup.vi v8, v16, 4, v0.t ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 49 +; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -785,196 +733,199 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: lui a3, 4 ; RV64-NEXT: li a4, 32 ; RV64-NEXT: addi a2, a2, 130 -; RV64-NEXT: addi a3, a3, 260 -; RV64-NEXT: vmv.s.x v2, a4 ; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vmv.s.x v3, a3 -; RV64-NEXT: vmv4r.v v4, v24 +; RV64-NEXT: addi a2, a3, 260 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a5, 77 +; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a5, 61 +; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a5, 12 +; RV64-NEXT: mul a3, a3, a5 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v0, a2 +; RV64-NEXT: vmv.s.x v2, a4 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v24, v8, v0 +; RV64-NEXT: vmv4r.v v4, v24 +; RV64-NEXT: vmerge.vvm v16, v8, v16, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: li a3, 29 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v3 -; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vmv4r.v v28, v24 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 57 +; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v4, v8, 5, v0.t -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vslideup.vi v28, v8, 5, v0.t +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 +; RV64-NEXT: li a3, 49 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v4, v24, 4, v0.t +; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v28, v16, 4, v0.t ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 37 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v4, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v20, v8, 1 +; RV64-NEXT: vs4r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vslidedown.vi v12, v8, 1 ; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: vslideup.vi v20, v8, 4, v0.t -; RV64-NEXT: vmv1r.v v0, v1 -; RV64-NEXT: vrgather.vi v20, v24, 5, v0.t +; RV64-NEXT: vslideup.vi v12, v8, 4, v0.t +; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 57 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill ; RV64-NEXT: lui a2, 8 ; RV64-NEXT: addi a2, a2, 520 ; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vslideup.vi v8, v24, 6 +; RV64-NEXT: vslideup.vi v20, v16, 6 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 85 +; RV64-NEXT: li a3, 61 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 29 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 +; RV64-NEXT: li a3, 21 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v8, v16, 1, v0.t +; RV64-NEXT: vslideup.vi v20, v16, 1, v0.t ; RV64-NEXT: lui a2, %hi(.LCPI8_1) ; RV64-NEXT: addi a2, a2, %lo(.LCPI8_1) -; RV64-NEXT: lui a3, %hi(.LCPI8_2) -; RV64-NEXT: addi a3, a3, %lo(.LCPI8_2) -; RV64-NEXT: li a4, 192 -; RV64-NEXT: vmv.s.x v0, a4 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: li a5, 28 -; RV64-NEXT: mul a4, a4, a5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV64-NEXT: li a3, 192 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v28, (a2) -; RV64-NEXT: vle16.v v30, (a3) +; RV64-NEXT: vle16.v v12, (a2) +; RV64-NEXT: vmv.s.x v0, a3 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 57 +; RV64-NEXT: li a3, 20 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v24, v16, 2 -; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV64-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 24 +; RV64-NEXT: li a3, 85 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgather.vi v8, v24, 2 +; RV64-NEXT: vmerge.vvm v8, v8, v20, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v16, v28 -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v8, v30 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v0, v24, v12 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV64-NEXT: lui a2, %hi(.LCPI8_2) +; RV64-NEXT: addi a2, a2, %lo(.LCPI8_2) +; RV64-NEXT: li a3, 1040 +; RV64-NEXT: vmv.s.x v0, a3 +; RV64-NEXT: addi a1, a1, -2016 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 61 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 77 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 12 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vle16.v v6, (a2) +; RV64-NEXT: li a1, 64 +; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: li a3, 77 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 1040 -; RV64-NEXT: li a3, 64 -; RV64-NEXT: addi a1, a1, -2016 -; RV64-NEXT: vmv.s.x v0, a2 -; RV64-NEXT: vmv.s.x v2, a3 -; RV64-NEXT: vmv.s.x v1, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 85 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV64-NEXT: vrgatherei16.vv v8, v24, v6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v1 -; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 85 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 49 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v20, v8 -; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v8, v16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vslideup.vi v20, v8, 5, v0.t +; RV64-NEXT: vslideup.vi v8, v16, 5, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 @@ -982,102 +933,103 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 73 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v12, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 73 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v24 +; RV64-NEXT: vmv.v.v v16, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 49 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, %hi(.LCPI8_3) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vle16.v v18, (a1) +; RV64-NEXT: vle16.v v20, (a1) ; RV64-NEXT: lui a1, %hi(.LCPI8_4) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) -; RV64-NEXT: vle16.v v16, (a1) +; RV64-NEXT: vle16.v v12, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 61 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v24 +; RV64-NEXT: vmv.v.v v16, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 21 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v18 +; RV64-NEXT: vrgatherei16.vv v24, v0, v20 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v28, v24 +; RV64-NEXT: vmv.v.v v20, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 57 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v16 +; RV64-NEXT: vrgatherei16.vv v24, v0, v12 ; RV64-NEXT: lui a1, %hi(.LCPI8_5) ; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) -; RV64-NEXT: vle16.v v16, (a1) +; RV64-NEXT: vle16.v v20, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs2r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v16, v24 +; RV64-NEXT: vmv.v.v v20, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 57 +; RV64-NEXT: li a2, 85 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1085,54 +1037,54 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v24, v0, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 28 +; RV64-NEXT: li a2, 20 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v20, v24, v20, v0 +; RV64-NEXT: vmerge.vvm v8, v24, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 85 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl2r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v8 +; RV64-NEXT: vrgatherei16.vv v24, v0, v12 ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v24 +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v16, (a1) -; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vse64.v v20, (a1) +; RV64-NEXT: addi a1, a0, 320 +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 57 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v16, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v16, (a1) +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vse64.v v12, (a1) +; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 49 +; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v12, (a1) +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 6 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 73 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 232a364e87f0e..837a8891dfeb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -13741,14 +13741,13 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v12, v10, 16 -; RV64V-NEXT: vslidedown.vi v14, v8, 16 -; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64V-NEXT: vslidedown.vi v8, v0, 2 +; RV64V-NEXT: vslidedown.vi v8, v8, 16 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v14 -; RV64V-NEXT: vmv1r.v v0, v8 +; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64V-NEXT: vslidedown.vi v0, v0, 2 +; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index f72b08a405246..dbbec96445e3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -278,12 +278,12 @@ define <64 x float> @masked_load_v64f32(ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_load_v64f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <64 x float> @llvm.masked.load.v64f32(ptr %a, i32 8, <64 x i1> %mask, <64 x float> undef) @@ -294,12 +294,12 @@ define <128 x bfloat> @masked_load_v128bf16(ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_load_v128bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <128 x bfloat> @llvm.masked.load.v128bf16(ptr %a, i32 8, <128 x i1> %mask, <128 x bfloat> undef) @@ -310,12 +310,12 @@ define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_load_v128f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <128 x half> @llvm.masked.load.v128f16(ptr %a, i32 8, <128 x i1> %mask, <128 x half> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index 69903d77084bf..bca3544d8f032 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -276,12 +276,12 @@ define <64 x i32> @masked_load_v64i32(ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_load_v64i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <64 x i32> @llvm.masked.load.v64i32(ptr %a, i32 8, <64 x i1> %mask, <64 x i32> undef) @@ -303,12 +303,12 @@ define <128 x i16> @masked_load_v128i16(ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_load_v128i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <128 x i16> @llvm.masked.load.v128i16(ptr %a, i32 8, <128 x i1> %mask, <128 x i16> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index ed6ec4d5659b1..f7e311d06c03a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -278,12 +278,12 @@ define void @masked_store_v64f32(<64 x float> %val, ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_store_v64f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v64f32.p0(<64 x float> %val, ptr %a, i32 8, <64 x i1> %mask) @@ -294,12 +294,12 @@ define void @masked_store_v128bf16(<128 x bfloat> %val, ptr %a, <128 x i1> %mask ; CHECK-LABEL: masked_store_v128bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v128bf16.p0(<128 x bfloat> %val, ptr %a, i32 8, <128 x i1> %mask) @@ -310,12 +310,12 @@ define void @masked_store_v128f16(<128 x half> %val, ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_store_v128f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v128f16.p0(<128 x half> %val, ptr %a, i32 8, <128 x i1> %mask) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index c3b10db115bae..6914a86726af4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -276,12 +276,12 @@ define void @masked_store_v64i32(<64 x i32> %val, ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_store_v64i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v64i32.p0(<64 x i32> %val, ptr %a, i32 8, <64 x i1> %mask) @@ -303,12 +303,12 @@ define void @masked_store_v128i16(<128 x i16> %val, ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_store_v128i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v128i16.p0(<128 x i16> %val, ptr %a, i32 8, <128 x i1> %mask) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index acb1802181540..8d11ecacfa41e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -234,22 +234,24 @@ define <16 x i32> @v16i32_v4i32(<4 x i32>) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: lui a0, 2 -; CHECK-NEXT: vmv.v.i v10, 3 +; CHECK-NEXT: vmv.v.i v8, 3 ; CHECK-NEXT: addi a1, a0, 265 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 4 ; CHECK-NEXT: addi a1, a1, 548 -; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: addi a0, a0, -1856 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vsext.vf2 v10, v8 @@ -279,20 +281,22 @@ define <32 x i32> @v32i32_v4i32(<4 x i32>) { ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 270865 +; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 3 ; CHECK-NEXT: addi a1, a1, 548 -; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 100550 ; CHECK-NEXT: addi a1, a1, 64 -; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 0, v0 +; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 3 ; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: vmerge.vim v12, v12, 2, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v12, v12, 0, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v12, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vsext.vf2 v12, v10 ; CHECK-NEXT: vslidedown.vx v20, v12, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index 59ddc021f4999..5e6d7c1eedb76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -98,17 +98,16 @@ define void @deinterleave5_0_i8(ptr %in, ptr %out) { ; CHECK-LABEL: deinterleave5_0_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a0, 33 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v9, 8 -; CHECK-NEXT: vmv.v.i v8, 10 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vslidedown.vi v9, v9, 4, v0.t -; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vmv.v.i v0, 10 +; CHECK-NEXT: vslidedown.vi v8, v8, 4, v0.t +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -146,15 +145,15 @@ define void @deinterleave6_0_i8(ptr %in, ptr %out) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 5, v0.t +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vmv1r.v v10, v8 -; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vrgather.vi v10, v9, 4, v0.t -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vrgather.vi v9, v8, 4, v0.t +; CHECK-NEXT: vse8.v v9, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -192,15 +191,15 @@ define void @deinterleave7_0_i8(ptr %in, ptr %out) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v0, 2 +; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v9, v8, 6, v0.t +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vmv1r.v v10, v8 -; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vslidedown.vi v10, v10, 6, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vrgather.vi v10, v9, 6, v0.t -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vrgather.vi v9, v8, 6, v0.t +; CHECK-NEXT: vse8.v v9, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -280,17 +279,18 @@ define void @deinterleave7_0_i64(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v8, 8 -; CHECK-NEXT: vmv4r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmv4r.v v16, v8 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vslidedown.vi v12, v12, 6, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vrgather.vi v12, v16, 6, v0.t -; CHECK-NEXT: vse64.v v12, (a1) +; CHECK-NEXT: vslidedown.vi v16, v8, 6, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; CHECK-NEXT: vrgather.vi v16, v8, 6, v0.t +; CHECK-NEXT: vse64.v v16, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i64>, ptr %in @@ -324,18 +324,19 @@ define void @deinterleave7_0_i32_subvec(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v12, v8, 8 -; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 -; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslidedown.vi v12, v8, 6, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 4 +; CHECK-NEXT: vsetivli zero, 8, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v10, v10, 6, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vrgather.vi v10, v12, 6, v0.t +; CHECK-NEXT: vrgather.vi v12, v8, 6, v0.t ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: vse32.v v12, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i32>, ptr %in diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index 2da18fbb8e41c..d06ce0a817f9e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -183,16 +183,15 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a0, a0, 252 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmv.v.i v0, 1 -; CHECK-NEXT: vmv.v.i v8, 5 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vslidedown.vi v9, v9, 1, v0.t +; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t +; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v11, v11, v9, v0 +; CHECK-NEXT: vmerge.vvm v11, v11, v8, v0 ; CHECK-NEXT: addi a0, a1, 672 ; CHECK-NEXT: vs2r.v v10, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 0fbb139d5f461..7b9a30187f5e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -391,10 +391,10 @@ define void @vnsrl_0_i8_undef3(ptr %in, ptr %out) { ; CHECK-NEXT: vid.v v9 ; CHECK-NEXT: li a0, -32 ; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vadd.vi v9, v9, -8 ; CHECK-NEXT: vslidedown.vi v10, v8, 2 ; CHECK-NEXT: vslidedown.vi v10, v8, 3, v0.t ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vadd.vi v9, v9, -8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 1f6513ae09d60..64f1819f0e329 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -298,11 +298,11 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: addi a1, a1, -1 ; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 05254e60b65b7..157b4c8d7007f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -456,49 +456,36 @@ define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir index 2d49b4e4f493f..a967f86f5b930 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -18,7 +18,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: renamable $v8 = PseudoVMERGE_VIM_M1 undef renamable $v8, killed renamable $v2, 1, $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v0 = COPY killed renamable $v1, implicit $vtype + ; CHECK-NEXT: renamable $v0 = COPY $v1, implicit $vtype ; CHECK-NEXT: renamable $v9 = PseudoVMERGE_VIM_M1 undef renamable $v9, killed renamable $v3, 1, $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: renamable $v0 = PseudoVADD_VV_M1 undef renamable $v0, killed renamable $v8, killed renamable $v9, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET implicit $v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 0fad09f27007c..72ea0be5da28d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1221,12 +1221,12 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; RV32: # %bb.0: ; RV32-NEXT: vl8re64.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: srli a2, a0, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v7, v0, a2 +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t -; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, a1, a0 @@ -1236,37 +1236,20 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; ; RV64-LABEL: mgather_nxv16i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vmv8r.v v16, v8 ; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v24, (zero), v8, v0.t ; RV64-NEXT: vl8re64.v v8, (a1) +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a1, a0, 3 -; RV64-NEXT: vslidedown.vx v7, v0, a1 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, a2, a0 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v24, (a2) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 16 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %p0 = call @llvm.vector.insert.nxv8p0.nxv16p0( undef, %ptrs0, i64 0) %p1 = call @llvm.vector.insert.nxv8p0.nxv16p0( %p0, %ptrs1, i64 8) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 3cf7cc9cb5152..ef2085323823b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1887,15 +1887,15 @@ declare @llvm.vector.insert.nxv8p0.nxv16p0( %val0, %val1, %ptrs0, %ptrs1, %m) { ; RV32-LABEL: mscatter_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: vl4re32.v v24, (a0) ; RV32-NEXT: vl4re32.v v28, (a1) -; RV32-NEXT: vl4re32.v v4, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a0, a0, 3 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a0 +; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v8, (zero), v4, v0.t -; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret ; @@ -1904,38 +1904,26 @@ define void @mscatter_nxv16f64( %val0, %val0, %val0, %val0, %val1, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vl2r.v v4, (a1) +; RV32-NEXT: vl2r.v v6, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v7, v0, a1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vsext.vf4 v24, v4 +; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV32-NEXT: vsext.vf4 v24, v6 ; RV32-NEXT: vsll.vi v24, v24, 3 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; @@ -1994,10 +1972,10 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64( %val0, %val0, %val0, %val1, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma -; RV32-NEXT: vmv8r.v v16, v8 -; RV32-NEXT: vl4re16.v v8, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 3 -; RV32-NEXT: vslidedown.vx v7, v0, a1 +; RV32-NEXT: vl4re16.v v4, (a1) ; RV32-NEXT: li a1, 8 ; RV32-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulsu.vx v24, v8, a1 +; RV32-NEXT: vwmulsu.vx v24, v4, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsoxei32.v v16, (a0), v24, v0.t -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsoxei32.v v8, (a0), v28, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 16 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: @@ -2051,10 +2012,10 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64( %val0, , <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_v16i1_v32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v9, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v0, 2 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v9, 0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-NEXT: vnsrl.wi v9, v9, 8 ; CHECK-NEXT: vnsrl.wi v11, v8, 0 -; CHECK-NEXT: vnsrl.wi v10, v10, 8 ; CHECK-NEXT: vnsrl.wi v8, v8, 8 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v9, v11, 8 -; CHECK-NEXT: vslideup.vi v10, v8, 8 -; CHECK-NEXT: vmsne.vi v0, v9, 0 -; CHECK-NEXT: vmsne.vi v8, v10, 0 +; CHECK-NEXT: vslideup.vi v10, v11, 8 +; CHECK-NEXT: vslideup.vi v9, v8, 8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmsne.vi v8, v9, 0 ; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) ret {<16 x i1>, <16 x i1>} %retval @@ -103,17 +103,16 @@ define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) { ; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0 -; CHECK-NEXT: vslidedown.vi v18, v8, 1 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v14, 4 +; CHECK-NEXT: vslidedown.vi v14, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v18, v8, 2, v0.t +; CHECK-NEXT: vslidedown.vi v14, v8, 2, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vmv2r.v v8, v16 -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vmerge.vvm v10, v18, v8, v0 +; CHECK-NEXT: vmerge.vvm v10, v14, v8, v0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec) @@ -512,17 +511,16 @@ define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double ; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0 -; CHECK-NEXT: vslidedown.vi v18, v8, 1 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v14, 4 +; CHECK-NEXT: vslidedown.vi v14, v8, 1 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vslidedown.vi v18, v8, 2, v0.t +; CHECK-NEXT: vslidedown.vi v14, v8, 2, v0.t +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vmv2r.v v8, v16 -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 -; CHECK-NEXT: vmerge.vvm v10, v18, v8, v0 +; CHECK-NEXT: vmerge.vvm v10, v14, v8, v0 ; CHECK-NEXT: vmv2r.v v8, v12 ; CHECK-NEXT: ret %retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 304db932723f5..ff55c09a0707b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -10,15 +10,14 @@ define {, } @vector_deinterleave_nxv16i1_nxv ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v0, a0 +; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -453,24 +452,23 @@ define {, , } @vector_dein ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vmerge.vim v16, v10, 1, v0 ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v0, a1 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v16, v10, 1, v0 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: srli a0, a0, 1 -; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v9, v0, a0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmerge.vim v18, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v8, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmerge.vim v20, v10, 1, v0 -; CHECK-NEXT: vs8r.v v16, (a1) -; CHECK-NEXT: vlseg3e8.v v8, (a1) +; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v9, v12, 0 @@ -576,40 +574,40 @@ define {, , , , , ,