From bfe54031d303c16650ec5f9a901686dd48628768 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 10 Mar 2025 13:43:28 -0700 Subject: [PATCH] [RISCV] Convert vsub.vx to vadd.vi if possible We'd already had this transform for the intrinsics, but hadn't added it for either fixed length or scalable vectors coming from normal IR. For the record, the fact we have three different sets of patterns here really is quite ugly. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 7 + llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h | 1 + .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 13 +- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 15 +- llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll | 16 +- llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll | 580 ++--- llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 1204 ++++----- .../RISCV/rvv/fixed-vectors-cttz-vp.ll | 2272 ++++++++--------- .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll | 68 +- .../CodeGen/RISCV/rvv/fixed-vectors-int.ll | 12 +- .../RISCV/rvv/fold-binop-into-select.ll | 5 +- llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll | 20 +- .../RISCV/rvv/vscale-vw-web-simplification.ll | 12 +- llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll | 30 +- 14 files changed, 2020 insertions(+), 2235 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f3cce950ed7b5..8aa684c56bde0 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3573,6 +3573,13 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) { /*Decrement=*/true); } +bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal) { + return selectVSplatImmHelper( + N, SplatVal, *CurDAG, *Subtarget, + [](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; }, + /*Decrement=*/false); +} + bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal) { return selectVSplatImmHelper( diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index 5048a80fdd18f..db09ad146b655 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -137,6 +137,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel { return selectVSplatUimm(N, Bits, Val); } bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal); + bool selectVSplatSimm5Plus1NoDec(SDValue N, SDValue &SplatVal); bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal); // Matches the splat of a value which can be extended or truncated, such that // only the bottom 8 bits are preserved. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 2bd61883760e5..b2c5261ae6c2d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -877,9 +877,9 @@ foreach mti = AllMasks in // 11.1. Vector Single-Width Integer Add and Subtract defm : VPatBinarySDNode_VV_VX_VI; defm : VPatBinarySDNode_VV_VX; -// Handle VRSUB specially since it's the only integer binary op with reversed -// pattern operands foreach vti = AllIntegerVectors in { + // Handle VRSUB specially since it's the only integer binary op with reversed + // pattern operands // FIXME: The AddedComplexity here is covering up a missing matcher for // widening vwsub.vx which can recognize a extended folded into the // scalar of the splat. @@ -896,6 +896,15 @@ foreach vti = AllIntegerVectors in { (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>; } + + let Predicates = GetVTypePredicates.Predicates in { + // Match VSUB with a small immediate to vadd.vi by negating the immediate. + def : Pat<(sub (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat_simm5_plus1_nodec simm5_plus1:$rs2))), + (!cast("PseudoVADD_VI_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + (NegImm simm5_plus1:$rs2), vti.AVL, vti.Log2SEW, TA_MA)>; + } } // 11.2. Vector Widening Integer Add and Subtract diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 43cfc9d1e77ca..5d98ffedcbb9a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -598,6 +598,8 @@ def SplatPat_uimm5 : ComplexPattern", [], [], def SplatPat_uimm6 : ComplexPattern", [], [], 3>; def SplatPat_simm5_plus1 : ComplexPattern; +def SplatPat_simm5_plus1_nodec + : ComplexPattern; def SplatPat_simm5_plus1_nonzero : ComplexPattern; @@ -1992,10 +1994,10 @@ multiclass VPatAVGADDVL_VV_VX_RM { // 11.1. Vector Single-Width Integer Add and Subtract defm : VPatBinaryVL_VV_VX_VI; defm : VPatBinaryVL_VV_VX; -// Handle VRSUB specially since it's the only integer binary op with reversed -// pattern operands foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { + // Handle VRSUB specially since it's the only integer binary op with + // reversed pattern operands def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), (vti.Vector vti.RegClass:$rs1), vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), @@ -2008,6 +2010,15 @@ foreach vti = AllIntegerVectors in { (!cast("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$passthru, vti.RegClass:$rs1, simm5:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + // Match VSUB with a small immediate to vadd.vi by negating the immediate. + def : Pat<(riscv_sub_vl (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat_simm5_plus1_nodec simm5_plus1:$rs2)), + vti.RegClass:$passthru, (vti.Mask VMV0:$vm), VLOpFrag), + (!cast("PseudoVADD_VI_"#vti.LMul.MX#"_MASK") + vti.RegClass:$passthru, vti.RegClass:$rs1, + (NegImm simm5_plus1:$rs2), (vti.Mask VMV0:$vm), + GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 6f515996677ee..ceca813782461 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -2585,8 +2585,7 @@ define @vp_ctlz_nxv1i9( %va, @vp_ctlz_nxv1i9( %va, @llvm.vp.ctlz.nxv1i9( %va, i1 false, %m, i32 %evl) ret %v @@ -2744,13 +2742,12 @@ define @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor( @vp_ctlo_zero_nxv1i9_unpredicated_ctlz_with_vp_xor( @llvm.vp.xor.nxv1i9( %va, splat (i9 -1), %m, i32 %evl) %v = call @llvm.ctlz( %va.not, i1 false) diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index 5761ae0926eae..bd7a20f9ef590 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -11,12 +11,11 @@ define @cttz_nxv1i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -77,12 +76,11 @@ declare @llvm.cttz.nxv1i8(, i1) define @cttz_nxv2i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv2i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -143,12 +141,11 @@ declare @llvm.cttz.nxv2i8(, i1) define @cttz_nxv4i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv4i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -209,12 +206,11 @@ declare @llvm.cttz.nxv4i8(, i1) define @cttz_nxv8i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv8i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -275,12 +271,11 @@ declare @llvm.cttz.nxv8i8(, i1) define @cttz_nxv16i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv16i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -341,12 +336,11 @@ declare @llvm.cttz.nxv16i8(, i1) define @cttz_nxv32i8( %va) { ; CHECK-LABEL: cttz_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -373,12 +367,11 @@ declare @llvm.cttz.nxv32i8(, i1) define @cttz_nxv64i8( %va) { ; CHECK-LABEL: cttz_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -405,13 +398,12 @@ declare @llvm.cttz.nxv64i8(, i1) define @cttz_nxv1i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -472,13 +464,12 @@ declare @llvm.cttz.nxv1i16(, i1) define @cttz_nxv2i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv2i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -539,13 +530,12 @@ declare @llvm.cttz.nxv2i16(, i1) define @cttz_nxv4i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv4i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -606,13 +596,12 @@ declare @llvm.cttz.nxv4i16(, i1) define @cttz_nxv8i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv8i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -673,13 +662,12 @@ declare @llvm.cttz.nxv8i16(, i1) define @cttz_nxv16i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv16i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -740,13 +728,12 @@ declare @llvm.cttz.nxv16i16(, i1) define @cttz_nxv32i16( %va) { ; CHECK-LABEL: cttz_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 @@ -779,13 +766,12 @@ declare @llvm.cttz.nxv32i16(, i1) define @cttz_nxv1i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -850,13 +836,12 @@ declare @llvm.cttz.nxv1i32(, i1) define @cttz_nxv2i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv2i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -921,13 +906,12 @@ declare @llvm.cttz.nxv2i32(, i1) define @cttz_nxv4i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv4i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -992,13 +976,12 @@ declare @llvm.cttz.nxv4i32(, i1) define @cttz_nxv8i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv8i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -1063,13 +1046,12 @@ declare @llvm.cttz.nxv8i32(, i1) define @cttz_nxv16i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv16i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v16, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v16, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v16, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -1135,18 +1117,17 @@ declare @llvm.cttz.nxv16i32(, i1) define @cttz_nxv1i64( %va) { ; RV32I-LABEL: cttz_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vnot.v v9, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v9, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v10, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vand.vv v8, v9, v8 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -1177,17 +1158,14 @@ define @cttz_nxv1i64( %va) { ; ; RV64I-LABEL: cttz_nxv1i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1196,6 +1174,8 @@ define @cttz_nxv1i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m1, ta, ma +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -1261,18 +1241,17 @@ declare @llvm.cttz.nxv1i64(, i1) define @cttz_nxv2i64( %va) { ; RV32I-LABEL: cttz_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vnot.v v10, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v12, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vand.vv v8, v10, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma @@ -1303,17 +1282,14 @@ define @cttz_nxv2i64( %va) { ; ; RV64I-LABEL: cttz_nxv2i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1322,6 +1298,8 @@ define @cttz_nxv2i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m2, ta, ma +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 @@ -1387,18 +1365,17 @@ declare @llvm.cttz.nxv2i64(, i1) define @cttz_nxv4i64( %va) { ; RV32I-LABEL: cttz_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vnot.v v12, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32I-NEXT: vmv.v.x v16, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vand.vv v8, v12, v8 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma @@ -1429,17 +1406,14 @@ define @cttz_nxv4i64( %va) { ; ; RV64I-LABEL: cttz_nxv4i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1448,6 +1422,8 @@ define @cttz_nxv4i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m4, ta, ma +; RV64I-NEXT: vadd.vi v12, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v12 ; RV64I-NEXT: vsrl.vi v12, v8, 1 @@ -1513,18 +1489,17 @@ declare @llvm.cttz.nxv4i64(, i1) define @cttz_nxv8i64( %va) { ; RV32I-LABEL: cttz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vnot.v v16, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vadd.vi v16, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32I-NEXT: vmv.v.x v24, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vand.vv v8, v16, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vand.vv v24, v16, v24 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -1555,17 +1530,14 @@ define @cttz_nxv8i64( %va) { ; ; RV64I-LABEL: cttz_nxv8i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV64I-NEXT: vsub.vx v16, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -1574,6 +1546,8 @@ define @cttz_nxv8i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m8, ta, ma +; RV64I-NEXT: vadd.vi v16, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v16 ; RV64I-NEXT: vsrl.vi v16, v8, 1 @@ -1639,12 +1613,11 @@ declare @llvm.cttz.nxv8i64(, i1) define @cttz_zero_undef_nxv1i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1700,12 +1673,11 @@ define @cttz_zero_undef_nxv1i8( %va) { define @cttz_zero_undef_nxv2i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1761,12 +1733,11 @@ define @cttz_zero_undef_nxv2i8( %va) { define @cttz_zero_undef_nxv4i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1822,12 +1793,11 @@ define @cttz_zero_undef_nxv4i8( %va) { define @cttz_zero_undef_nxv8i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1883,12 +1853,11 @@ define @cttz_zero_undef_nxv8i8( %va) { define @cttz_zero_undef_nxv16i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i8: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: li a0, 85 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: li a0, 51 @@ -1944,12 +1913,11 @@ define @cttz_zero_undef_nxv16i8( %va) { define @cttz_zero_undef_nxv32i8( %va) { ; CHECK-LABEL: cttz_zero_undef_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -1975,12 +1943,11 @@ define @cttz_zero_undef_nxv32i8( %va) { define @cttz_zero_undef_nxv64i8( %va) { ; CHECK-LABEL: cttz_zero_undef_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -2006,13 +1973,12 @@ define @cttz_zero_undef_nxv64i8( %va) { define @cttz_zero_undef_nxv1i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2066,13 +2032,12 @@ define @cttz_zero_undef_nxv1i16( %va) { define @cttz_zero_undef_nxv2i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2126,13 +2091,12 @@ define @cttz_zero_undef_nxv2i16( %va) { define @cttz_zero_undef_nxv4i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2186,13 +2150,12 @@ define @cttz_zero_undef_nxv4i16( %va) { define @cttz_zero_undef_nxv8i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2246,13 +2209,12 @@ define @cttz_zero_undef_nxv8i16( %va) { define @cttz_zero_undef_nxv16i16( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i16: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 5 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 3 @@ -2306,13 +2268,12 @@ define @cttz_zero_undef_nxv16i16( %va) { define @cttz_zero_undef_nxv32i16( %va) { ; CHECK-LABEL: cttz_zero_undef_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 @@ -2344,13 +2305,12 @@ define @cttz_zero_undef_nxv32i16( %va) { define @cttz_zero_undef_nxv1i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv1i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2408,13 +2368,12 @@ define @cttz_zero_undef_nxv1i32( %va) { define @cttz_zero_undef_nxv2i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv2i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v9, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v9, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v9 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v9, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v9, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v9, v9, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2472,13 +2431,12 @@ define @cttz_zero_undef_nxv2i32( %va) { define @cttz_zero_undef_nxv4i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv4i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v10, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v10, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v10 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v10, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v10, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v10, v10, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2536,13 +2494,12 @@ define @cttz_zero_undef_nxv4i32( %va) { define @cttz_zero_undef_nxv8i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv8i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v12, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v12, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v12 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v12, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v12, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v12, v12, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2600,13 +2557,12 @@ define @cttz_zero_undef_nxv8i32( %va) { define @cttz_zero_undef_nxv16i32( %va) { ; CHECK-ZVE64X-LABEL: cttz_zero_undef_nxv16i32: ; CHECK-ZVE64X: # %bb.0: -; CHECK-ZVE64X-NEXT: li a0, 1 -; CHECK-ZVE64X-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-ZVE64X-NEXT: vnot.v v16, v8 -; CHECK-ZVE64X-NEXT: vsub.vx v8, v8, a0 +; CHECK-ZVE64X-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-ZVE64X-NEXT: vadd.vi v16, v8, -1 +; CHECK-ZVE64X-NEXT: vnot.v v8, v8 ; CHECK-ZVE64X-NEXT: lui a0, 349525 +; CHECK-ZVE64X-NEXT: vand.vv v8, v8, v16 ; CHECK-ZVE64X-NEXT: addi a0, a0, 1365 -; CHECK-ZVE64X-NEXT: vand.vv v8, v16, v8 ; CHECK-ZVE64X-NEXT: vsrl.vi v16, v8, 1 ; CHECK-ZVE64X-NEXT: vand.vx v16, v16, a0 ; CHECK-ZVE64X-NEXT: lui a0, 209715 @@ -2665,18 +2621,17 @@ define @cttz_zero_undef_nxv16i32( %va) { define @cttz_zero_undef_nxv1i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv1i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vnot.v v9, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v9, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v10, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32I-NEXT: vand.vv v8, v9, v8 +; RV32I-NEXT: vand.vv v8, v8, v9 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v9, v8, 1 ; RV32I-NEXT: vand.vv v9, v9, v10 ; RV32I-NEXT: vsetvli a1, zero, e32, m1, ta, ma @@ -2707,17 +2662,14 @@ define @cttz_zero_undef_nxv1i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv1i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m1, ta, ma -; RV64I-NEXT: vsub.vx v9, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -2726,6 +2678,8 @@ define @cttz_zero_undef_nxv1i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m1, ta, ma +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -2783,18 +2737,17 @@ define @cttz_zero_undef_nxv1i64( %va) { define @cttz_zero_undef_nxv2i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv2i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vnot.v v10, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v12, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32I-NEXT: vand.vv v8, v10, v8 +; RV32I-NEXT: vand.vv v8, v8, v10 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v10, v8, 1 ; RV32I-NEXT: vand.vv v10, v10, v12 ; RV32I-NEXT: vsetvli a1, zero, e32, m2, ta, ma @@ -2825,17 +2778,14 @@ define @cttz_zero_undef_nxv2i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv2i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m2, ta, ma -; RV64I-NEXT: vsub.vx v10, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -2844,6 +2794,8 @@ define @cttz_zero_undef_nxv2i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m2, ta, ma +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 @@ -2901,18 +2853,17 @@ define @cttz_zero_undef_nxv2i64( %va) { define @cttz_zero_undef_nxv4i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv4i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vnot.v v12, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32I-NEXT: vmv.v.x v16, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32I-NEXT: vand.vv v8, v12, v8 +; RV32I-NEXT: vand.vv v8, v8, v12 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v12, v8, 1 ; RV32I-NEXT: vand.vv v12, v12, v16 ; RV32I-NEXT: vsetvli a1, zero, e32, m4, ta, ma @@ -2943,17 +2894,14 @@ define @cttz_zero_undef_nxv4i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv4i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m4, ta, ma -; RV64I-NEXT: vsub.vx v12, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -2962,6 +2910,8 @@ define @cttz_zero_undef_nxv4i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m4, ta, ma +; RV64I-NEXT: vadd.vi v12, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v12 ; RV64I-NEXT: vsrl.vi v12, v8, 1 @@ -3019,18 +2969,17 @@ define @cttz_zero_undef_nxv4i64( %va) { define @cttz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a0, 1 -; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vnot.v v16, v8 -; RV32I-NEXT: vsub.vx v8, v8, a0 +; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32I-NEXT: vadd.vi v16, v8, -1 +; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32I-NEXT: vmv.v.x v24, a0 ; RV32I-NEXT: lui a0, 209715 -; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32I-NEXT: vand.vv v8, v16, v8 +; RV32I-NEXT: vand.vv v8, v8, v16 +; RV32I-NEXT: addi a0, a0, 819 ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vand.vv v24, v16, v24 ; RV32I-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -3061,17 +3010,14 @@ define @cttz_zero_undef_nxv8i64( %va) { ; ; RV64I-LABEL: cttz_zero_undef_nxv8i64: ; RV64I: # %bb.0: -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: lui a1, 349525 -; RV64I-NEXT: lui a2, 209715 -; RV64I-NEXT: lui a3, 61681 -; RV64I-NEXT: lui a4, 4112 -; RV64I-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV64I-NEXT: vsub.vx v16, v8, a0 -; RV64I-NEXT: addiw a0, a1, 1365 -; RV64I-NEXT: addiw a1, a2, 819 -; RV64I-NEXT: addiw a2, a3, -241 -; RV64I-NEXT: addiw a3, a4, 257 +; RV64I-NEXT: lui a0, 349525 +; RV64I-NEXT: lui a1, 209715 +; RV64I-NEXT: lui a2, 61681 +; RV64I-NEXT: lui a3, 4112 +; RV64I-NEXT: addiw a0, a0, 1365 +; RV64I-NEXT: addiw a1, a1, 819 +; RV64I-NEXT: addiw a2, a2, -241 +; RV64I-NEXT: addiw a3, a3, 257 ; RV64I-NEXT: slli a4, a0, 32 ; RV64I-NEXT: add a0, a0, a4 ; RV64I-NEXT: slli a4, a1, 32 @@ -3080,6 +3026,8 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV64I-NEXT: add a2, a2, a4 ; RV64I-NEXT: slli a4, a3, 32 ; RV64I-NEXT: add a3, a3, a4 +; RV64I-NEXT: vsetvli a4, zero, e64, m8, ta, ma +; RV64I-NEXT: vadd.vi v16, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v16 ; RV64I-NEXT: vsrl.vi v16, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 766717d92a749..38ef54ff4fe41 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -13,9 +13,8 @@ declare @llvm.vp.cttz.nxv1i8(, i1 immarg, @vp_cttz_nxv1i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -44,12 +43,11 @@ define @vp_cttz_nxv1i8( %va, @vp_cttz_nxv1i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -77,9 +75,8 @@ declare @llvm.vp.cttz.nxv2i8(, i1 immarg, @vp_cttz_nxv2i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -108,12 +105,11 @@ define @vp_cttz_nxv2i8( %va, @vp_cttz_nxv2i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -141,9 +137,8 @@ declare @llvm.vp.cttz.nxv4i8(, i1 immarg, @vp_cttz_nxv4i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -172,12 +167,11 @@ define @vp_cttz_nxv4i8( %va, @vp_cttz_nxv4i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -205,9 +199,8 @@ declare @llvm.vp.cttz.nxv8i8(, i1 immarg, @vp_cttz_nxv8i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -236,12 +229,11 @@ define @vp_cttz_nxv8i8( %va, @vp_cttz_nxv8i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -269,9 +261,8 @@ declare @llvm.vp.cttz.nxv16i8(, i1 immarg, define @vp_cttz_nxv16i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v10, v0.t @@ -300,12 +291,11 @@ define @vp_cttz_nxv16i8( %va, @vp_cttz_nxv16i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v10, v8 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: li a0, 51 @@ -333,9 +323,8 @@ declare @llvm.vp.cttz.nxv32i8(, i1 immarg, define @vp_cttz_nxv32i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v12, v0.t @@ -364,12 +353,11 @@ define @vp_cttz_nxv32i8( %va, @vp_cttz_nxv32i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -397,9 +385,8 @@ declare @llvm.vp.cttz.nxv64i8(, i1 immarg, define @vp_cttz_nxv64i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t @@ -428,12 +415,11 @@ define @vp_cttz_nxv64i8( %va, @vp_cttz_nxv64i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -461,9 +447,8 @@ declare @llvm.vp.cttz.nxv1i16(, i1 immarg, define @vp_cttz_nxv1i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -499,13 +484,12 @@ define @vp_cttz_nxv1i16( %va, @vp_cttz_nxv1i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -539,9 +523,8 @@ declare @llvm.vp.cttz.nxv2i16(, i1 immarg, define @vp_cttz_nxv2i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -577,13 +560,12 @@ define @vp_cttz_nxv2i16( %va, @vp_cttz_nxv2i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -617,9 +599,8 @@ declare @llvm.vp.cttz.nxv4i16(, i1 immarg, define @vp_cttz_nxv4i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -655,13 +636,12 @@ define @vp_cttz_nxv4i16( %va, @vp_cttz_nxv4i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -695,9 +675,8 @@ declare @llvm.vp.cttz.nxv8i16(, i1 immarg, define @vp_cttz_nxv8i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -733,13 +712,12 @@ define @vp_cttz_nxv8i16( %va, @vp_cttz_nxv8i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 3 @@ -773,9 +751,8 @@ declare @llvm.vp.cttz.nxv16i16(, i1 immar define @vp_cttz_nxv16i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -811,13 +788,12 @@ define @vp_cttz_nxv16i16( %va, @vp_cttz_nxv16i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 3 @@ -851,9 +827,8 @@ declare @llvm.vp.cttz.nxv32i16(, i1 immar define @vp_cttz_nxv32i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -889,13 +864,12 @@ define @vp_cttz_nxv32i16( %va, @vp_cttz_nxv32i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 @@ -929,9 +903,8 @@ declare @llvm.vp.cttz.nxv1i32(, i1 immarg, define @vp_cttz_nxv1i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -968,13 +941,12 @@ define @vp_cttz_nxv1i32( %va, @vp_cttz_nxv1i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv1i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1009,9 +981,8 @@ declare @llvm.vp.cttz.nxv2i32(, i1 immarg, define @vp_cttz_nxv2i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1048,13 +1019,12 @@ define @vp_cttz_nxv2i32( %va, @vp_cttz_nxv2i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1089,9 +1059,8 @@ declare @llvm.vp.cttz.nxv4i32(, i1 immarg, define @vp_cttz_nxv4i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1128,13 +1097,12 @@ define @vp_cttz_nxv4i32( %va, @vp_cttz_nxv4i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1169,9 +1137,8 @@ declare @llvm.vp.cttz.nxv8i32(, i1 immarg, define @vp_cttz_nxv8i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1208,13 +1175,12 @@ define @vp_cttz_nxv8i32( %va, @vp_cttz_nxv8i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1249,9 +1215,8 @@ declare @llvm.vp.cttz.nxv16i32(, i1 immar define @vp_cttz_nxv16i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -1288,13 +1253,12 @@ define @vp_cttz_nxv16i32( %va, @vp_cttz_nxv16i32_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_nxv16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 209715 @@ -1329,9 +1293,8 @@ declare @llvm.vp.cttz.nxv1i64(, i1 immarg, define @vp_cttz_nxv1i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vadd.vi v9, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma @@ -1371,39 +1334,38 @@ define @vp_cttz_nxv1i64( %va, @vp_cttz_nxv1i64( %va, @vp_cttz_nxv1i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv1i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v9, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v9, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsetvli a2, zero, e32, m1, ta, ma @@ -1461,38 +1422,37 @@ define @vp_cttz_nxv1i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv1i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vand.vx v9, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1511,9 +1471,8 @@ declare @llvm.vp.cttz.nxv2i64(, i1 immarg, define @vp_cttz_nxv2i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t +; RV32-NEXT: vadd.vi v10, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma @@ -1553,39 +1512,38 @@ define @vp_cttz_nxv2i64( %va, @vp_cttz_nxv2i64( %va, @vp_cttz_nxv2i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v10, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v10, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma @@ -1643,38 +1600,37 @@ define @vp_cttz_nxv2i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vand.vx v10, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1693,9 +1649,8 @@ declare @llvm.vp.cttz.nxv4i64(, i1 immarg, define @vp_cttz_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t +; RV32-NEXT: vadd.vi v12, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma @@ -1735,39 +1690,38 @@ define @vp_cttz_nxv4i64( %va, @vp_cttz_nxv4i64( %va, @vp_cttz_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v12, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v12, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma @@ -1825,38 +1778,37 @@ define @vp_cttz_nxv4i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vand.vx v12, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1875,9 +1827,8 @@ declare @llvm.vp.cttz.nxv7i64(, i1 immarg, define @vp_cttz_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1917,39 +1868,38 @@ define @vp_cttz_nxv7i64( %va, @vp_cttz_nxv7i64( %va, @vp_cttz_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -2007,38 +1956,37 @@ define @vp_cttz_nxv7i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv7i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2057,9 +2005,8 @@ declare @llvm.vp.cttz.nxv8i64(, i1 immarg, define @vp_cttz_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -2099,39 +2046,38 @@ define @vp_cttz_nxv8i64( %va, @vp_cttz_nxv8i64( %va, @vp_cttz_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -2189,38 +2134,37 @@ define @vp_cttz_nxv8i64_unmasked( %va, i32 ; ; RV64-LABEL: vp_cttz_nxv8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -2254,138 +2198,131 @@ define @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64_unmasked( %va, i ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 1 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: lui a4, 209715 -; RV32-NEXT: sub a5, a0, a1 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: addi a4, a4, 819 -; RV32-NEXT: vsetvli a6, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v0, a3 -; RV32-NEXT: sltu a3, a0, a5 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a3, a3, a5 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v16, a2 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: sub a4, a0, a1 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: addi a3, a3, 819 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: sltu a2, a0, a4 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v24, v16, -1 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsrl.vi v24, v16, 1 -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: li a6, 24 -; RV32-NEXT: mul a5, a5, a6 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v0, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 -; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 24 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v24, v0 +; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v0, a3 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: lui a4, 61681 -; RV32-NEXT: lui a5, 4112 -; RV32-NEXT: addi a4, a4, -241 -; RV32-NEXT: addi a5, a5, 257 -; RV32-NEXT: vsetvli a6, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a4 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: lui a3, 61681 +; RV32-NEXT: lui a4, 4112 +; RV32-NEXT: addi a3, a3, -241 +; RV32-NEXT: addi a4, a4, 257 +; RV32-NEXT: vsetvli a5, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a5 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a4 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v16, v16, v24 -; RV32-NEXT: li a3, 56 -; RV32-NEXT: vsrl.vx v16, v16, a3 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: bltu a0, a1, .LBB47_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 ; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v24, v8, a2 +; RV32-NEXT: vadd.vi v24, v8, -1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 1 @@ -2715,7 +2650,7 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v24 -; RV32-NEXT: vsrl.vx v8, v8, a3 +; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 @@ -2727,64 +2662,63 @@ define @vp_cttz_nxv16i64_unmasked( %va, i ; RV64-LABEL: vp_cttz_nxv16i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 1 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: sub a7, a0, a1 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: addiw t0, a5, -241 -; RV64-NEXT: addiw t1, a6, 257 -; RV64-NEXT: slli a6, a3, 32 -; RV64-NEXT: add a6, a3, a6 -; RV64-NEXT: slli a5, a4, 32 -; RV64-NEXT: add a5, a4, a5 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: sub a6, a0, a1 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a7, a4, -241 +; RV64-NEXT: addiw t0, a5, 257 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a5, a2, a5 +; RV64-NEXT: slli a4, a3, 32 +; RV64-NEXT: add a4, a3, a4 +; RV64-NEXT: slli a2, a7, 32 +; RV64-NEXT: add a2, a7, a2 ; RV64-NEXT: slli a3, t0, 32 ; RV64-NEXT: add a3, t0, a3 -; RV64-NEXT: slli a4, t1, 32 -; RV64-NEXT: add a4, t1, a4 -; RV64-NEXT: sltu t0, a0, a7 -; RV64-NEXT: addi t0, t0, -1 -; RV64-NEXT: and a7, t0, a7 -; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 +; RV64-NEXT: sltu a7, a0, a6 +; RV64-NEXT: addi a7, a7, -1 +; RV64-NEXT: and a6, a7, a6 +; RV64-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v24, v16, -1 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 ; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a6 +; RV64-NEXT: vand.vx v24, v24, a5 ; RV64-NEXT: vsub.vv v16, v16, v24 -; RV64-NEXT: vand.vx v24, v16, a5 +; RV64-NEXT: vand.vx v24, v16, a4 ; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a5 +; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a3 -; RV64-NEXT: vmul.vx v16, v16, a4 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsrl.vx v16, v16, a7 +; RV64-NEXT: vand.vx v16, v16, a2 +; RV64-NEXT: vmul.vx v16, v16, a3 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: bltu a0, a1, .LBB47_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 ; RV64-NEXT: .LBB47_2: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v8, a2 +; RV64-NEXT: vadd.vi v24, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a6 +; RV64-NEXT: vand.vx v24, v24, a5 ; RV64-NEXT: vsub.vv v8, v8, v24 -; RV64-NEXT: vand.vx v24, v8, a5 +; RV64-NEXT: vand.vx v24, v8, a4 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 -; RV64-NEXT: vand.vx v8, v8, a3 -; RV64-NEXT: vmul.vx v8, v8, a4 -; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: vand.vx v8, v8, a2 +; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked: @@ -3080,9 +3014,8 @@ define @vp_cttz_zero_undef_nxv16i8_unmasked( @vp_cttz_zero_undef_nxv32i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v12, v0.t @@ -3111,12 +3044,11 @@ define @vp_cttz_zero_undef_nxv32i8( %va, @vp_cttz_zero_undef_nxv32i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v12, v8 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: li a0, 51 @@ -3143,9 +3075,8 @@ define @vp_cttz_zero_undef_nxv32i8_unmasked( @vp_cttz_zero_undef_nxv64i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t @@ -3174,12 +3105,11 @@ define @vp_cttz_zero_undef_nxv64i8( %va, @vp_cttz_zero_undef_nxv64i8_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv64i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v16, v8 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: li a0, 51 @@ -3436,9 +3366,8 @@ define @vp_cttz_zero_undef_nxv16i16_unmasked( @vp_cttz_zero_undef_nxv32i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vsub.vx v16, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v16, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -3474,13 +3403,12 @@ define @vp_cttz_zero_undef_nxv32i16( %va, define @vp_cttz_zero_undef_nxv32i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv32i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vnot.v v16, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v16, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v16 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v16, v8 ; CHECK-NEXT: vsrl.vi v16, v8, 1 ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: lui a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index a3eaf37631481..96acd0aceab13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -9,9 +9,8 @@ declare <2 x i8> @llvm.vp.cttz.v2i8(<2 x i8>, i1 immarg, <2 x i1>, i32) define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -34,12 +33,11 @@ define <2 x i8> @vp_cttz_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i8> @vp_cttz_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -61,9 +59,8 @@ declare <4 x i8> @llvm.vp.cttz.v4i8(<4 x i8>, i1 immarg, <4 x i1>, i32) define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -86,12 +83,11 @@ define <4 x i8> @vp_cttz_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i8> @vp_cttz_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -113,9 +109,8 @@ declare <8 x i8> @llvm.vp.cttz.v8i8(<8 x i8>, i1 immarg, <8 x i1>, i32) define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -138,12 +133,11 @@ define <8 x i8> @vp_cttz_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i8> @vp_cttz_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -165,9 +159,8 @@ declare <16 x i8> @llvm.vp.cttz.v16i8(<16 x i8>, i1 immarg, <16 x i1>, i32) define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -190,12 +183,11 @@ define <16 x i8> @vp_cttz_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { define <16 x i8> @vp_cttz_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -217,9 +209,8 @@ declare <2 x i16> @llvm.vp.cttz.v2i16(<2 x i16>, i1 immarg, <2 x i1>, i32) define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -249,13 +240,12 @@ define <2 x i16> @vp_cttz_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i16> @vp_cttz_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -283,9 +273,8 @@ declare <4 x i16> @llvm.vp.cttz.v4i16(<4 x i16>, i1 immarg, <4 x i1>, i32) define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -315,13 +304,12 @@ define <4 x i16> @vp_cttz_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i16> @vp_cttz_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -349,9 +337,8 @@ declare <8 x i16> @llvm.vp.cttz.v8i16(<8 x i16>, i1 immarg, <8 x i1>, i32) define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -381,13 +368,12 @@ define <8 x i16> @vp_cttz_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i16> @vp_cttz_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -415,9 +401,8 @@ declare <16 x i16> @llvm.vp.cttz.v16i16(<16 x i16>, i1 immarg, <16 x i1>, i32) define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -447,13 +432,12 @@ define <16 x i16> @vp_cttz_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl define <16 x i16> @vp_cttz_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 3 @@ -481,9 +465,8 @@ declare <2 x i32> @llvm.vp.cttz.v2i32(<2 x i32>, i1 immarg, <2 x i1>, i32) define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -514,13 +497,12 @@ define <2 x i32> @vp_cttz_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i32> @vp_cttz_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -549,9 +531,8 @@ declare <4 x i32> @llvm.vp.cttz.v4i32(<4 x i32>, i1 immarg, <4 x i1>, i32) define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -582,13 +563,12 @@ define <4 x i32> @vp_cttz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i32> @vp_cttz_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -617,9 +597,8 @@ declare <8 x i32> @llvm.vp.cttz.v8i32(<8 x i32>, i1 immarg, <8 x i1>, i32) define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -650,13 +629,12 @@ define <8 x i32> @vp_cttz_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i32> @vp_cttz_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 209715 @@ -685,9 +663,8 @@ declare <16 x i32> @llvm.vp.cttz.v16i32(<16 x i32>, i1 immarg, <16 x i1>, i32) define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -718,13 +695,12 @@ define <16 x i32> @vp_cttz_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl define <16 x i32> @vp_cttz_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_v16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 209715 @@ -753,9 +729,8 @@ declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32) define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vadd.vi v9, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -795,39 +770,38 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v9, v9, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: vand.vx v9, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 false, <2 x i1> %m, i32 %evl) @@ -837,18 +811,17 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v9, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v9, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -879,38 +852,37 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vand.vx v9, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -923,9 +895,8 @@ declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32) define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t +; RV32-NEXT: vadd.vi v10, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -965,39 +936,38 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vand.vx v10, v10, a0, v0.t +; RV64-NEXT: vand.vx v10, v10, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: vand.vx v10, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 false, <4 x i1> %m, i32 %evl) @@ -1007,18 +977,17 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v10, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v10, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -1049,38 +1018,37 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vand.vx v10, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1093,9 +1061,8 @@ declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32) define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t +; RV32-NEXT: vadd.vi v12, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -1135,39 +1102,38 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vand.vx v12, v12, a0, v0.t +; RV64-NEXT: vand.vx v12, v12, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: vand.vx v12, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 false, <8 x i1> %m, i32 %evl) @@ -1177,18 +1143,17 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v12, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v12, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -1219,38 +1184,37 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vand.vx v12, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1263,9 +1227,8 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1305,39 +1268,38 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_cttz_v15i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl) @@ -1347,18 +1309,17 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1389,38 +1350,37 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v15i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1433,9 +1393,8 @@ declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1475,39 +1434,38 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_cttz_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl) @@ -1517,18 +1475,17 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -1559,38 +1516,37 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; ; RV64-LABEL: vp_cttz_v16i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -1615,152 +1571,151 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB34_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 @@ -1772,8 +1727,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1787,29 +1742,29 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1817,8 +1772,8 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -1829,7 +1784,7 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -1857,75 +1812,74 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: bltu a0, a1, .LBB34_2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a4, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: addiw a7, a5, -241 -; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a2, 32 -; RV64-NEXT: add a6, a2, a6 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a5, a3, a5 -; RV64-NEXT: slli a2, a7, 32 -; RV64-NEXT: add a2, a7, a2 -; RV64-NEXT: slli a3, t0, 32 -; RV64-NEXT: add a3, t0, a3 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a5, a3, -241 +; RV64-NEXT: addiw a6, a4, 257 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a4, a1, a4 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a3, a2, a3 +; RV64-NEXT: slli a1, a5, 32 +; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: slli a2, a6, 32 +; RV64-NEXT: add a2, a6, a2 +; RV64-NEXT: addi a5, a0, -16 +; RV64-NEXT: sltu a0, a0, a5 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a7, a0, a7 +; RV64-NEXT: and a5, a0, a5 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a6, sp, 16 +; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 16 +; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1952,69 +1906,78 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a2, 1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v24, v8, -1 ; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a2 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vand.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vsub.vv v24, v24, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v0, v16, a2 +; RV32-NEXT: vadd.vi v0, v16, -1 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v0, v16, v0 -; RV32-NEXT: vsrl.vi v16, v0, 1 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v24, v16 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v24, v0, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v0, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v0, v24, v16 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vadd.vv v16, v0, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2025,7 +1988,7 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -2040,70 +2003,69 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB35_2: -; RV64-NEXT: li a2, 1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vnot.v v24, v8 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a3, 32 -; RV64-NEXT: add a3, a3, a7 -; RV64-NEXT: slli a7, a4, 32 -; RV64-NEXT: add a4, a4, a7 -; RV64-NEXT: slli a7, a5, 32 -; RV64-NEXT: add a5, a5, a7 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vadd.vi v24, v8, -1 +; RV64-NEXT: vnot.v v8, v8 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a2, a2, a6 +; RV64-NEXT: slli a6, a3, 32 +; RV64-NEXT: add a3, a3, a6 +; RV64-NEXT: slli a6, a4, 32 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: addi a6, a0, -16 +; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsub.vx v8, v8, a2 -; RV64-NEXT: vand.vv v8, v24, v8 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 +; RV64-NEXT: vadd.vi v24, v16, -1 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v8, a4 +; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v16, a4 +; RV64-NEXT: vand.vx v24, v16, a3 ; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmul.vx v8, v8, a6 +; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: vsrl.vx v16, v16, a7 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vmul.vx v16, v16, a5 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v @@ -2112,9 +2074,8 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2137,12 +2098,11 @@ define <2 x i8> @vp_cttz_zero_undef_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2162,9 +2122,8 @@ define <2 x i8> @vp_cttz_zero_undef_v2i8_unmasked(<2 x i8> %va, i32 zeroext %evl define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2187,12 +2146,11 @@ define <4 x i8> @vp_cttz_zero_undef_v4i8(<4 x i8> %va, <4 x i1> %m, i32 zeroext define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2212,9 +2170,8 @@ define <4 x i8> @vp_cttz_zero_undef_v4i8_unmasked(<4 x i8> %va, i32 zeroext %evl define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2237,12 +2194,11 @@ define <8 x i8> @vp_cttz_zero_undef_v8i8(<8 x i8> %va, <8 x i1> %m, i32 zeroext define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2262,9 +2218,8 @@ define <8 x i8> @vp_cttz_zero_undef_v8i8_unmasked(<8 x i8> %va, i32 zeroext %evl define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: vand.vv v8, v8, v9, v0.t @@ -2287,12 +2242,11 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8(<16 x i8> %va, <16 x i1> %m, i32 zero define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i8_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: li a0, 85 -; CHECK-NEXT: vand.vv v8, v9, v8 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: li a0, 51 @@ -2312,9 +2266,8 @@ define <16 x i8> @vp_cttz_zero_undef_v16i8_unmasked(<16 x i8> %va, i32 zeroext % define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2344,13 +2297,12 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16(<2 x i16> %va, <2 x i1> %m, i32 zeroe define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -2376,9 +2328,8 @@ define <2 x i16> @vp_cttz_zero_undef_v2i16_unmasked(<2 x i16> %va, i32 zeroext % define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2408,13 +2359,12 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16(<4 x i16> %va, <4 x i1> %m, i32 zeroe define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -2440,9 +2390,8 @@ define <4 x i16> @vp_cttz_zero_undef_v4i16_unmasked(<4 x i16> %va, i32 zeroext % define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2472,13 +2421,12 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16(<8 x i16> %va, <8 x i1> %m, i32 zeroe define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 3 @@ -2504,9 +2452,8 @@ define <8 x i16> @vp_cttz_zero_undef_v8i16_unmasked(<8 x i16> %va, i32 zeroext % define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 5 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2536,13 +2483,12 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16(<16 x i16> %va, <16 x i1> %m, i32 z define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 5 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 3 @@ -2568,9 +2514,8 @@ define <16 x i16> @vp_cttz_zero_undef_v16i16_unmasked(<16 x i16> %va, i32 zeroex define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2601,13 +2546,12 @@ define <2 x i32> @vp_cttz_zero_undef_v2i32(<2 x i32> %va, <2 x i1> %m, i32 zeroe define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v2i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2634,9 +2578,8 @@ define <2 x i32> @vp_cttz_zero_undef_v2i32_unmasked(<2 x i32> %va, i32 zeroext % define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vsub.vx v9, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v9, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2667,13 +2610,12 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroe define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v4i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vnot.v v9, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v9, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v9, v8 ; CHECK-NEXT: vsrl.vi v9, v8, 1 ; CHECK-NEXT: vand.vx v9, v9, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2700,9 +2642,8 @@ define <4 x i32> @vp_cttz_zero_undef_v4i32_unmasked(<4 x i32> %va, i32 zeroext % define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vsub.vx v10, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v10, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2733,13 +2674,12 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroe define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v8i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vnot.v v10, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v10, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v10 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v10, v8 ; CHECK-NEXT: vsrl.vi v10, v8, 1 ; CHECK-NEXT: vand.vx v10, v10, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2766,9 +2706,8 @@ define <8 x i32> @vp_cttz_zero_undef_v8i32_unmasked(<8 x i32> %va, i32 zeroext % define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v12, v8, a1, v0.t +; CHECK-NEXT: vadd.vi v12, v8, -1, v0.t ; CHECK-NEXT: lui a0, 349525 ; CHECK-NEXT: vnot.v v8, v8, v0.t ; CHECK-NEXT: addi a0, a0, 1365 @@ -2799,13 +2738,12 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32(<16 x i32> %va, <16 x i1> %m, i32 z define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_v16i32_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vnot.v v12, v8 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v12, v8, -1 +; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: lui a0, 349525 +; CHECK-NEXT: vand.vv v8, v8, v12 ; CHECK-NEXT: addi a0, a0, 1365 -; CHECK-NEXT: vand.vv v8, v12, v8 ; CHECK-NEXT: vsrl.vi v12, v8, 1 ; CHECK-NEXT: vand.vx v12, v12, a0 ; CHECK-NEXT: lui a0, 209715 @@ -2832,9 +2770,8 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsub.vx v9, v8, a1, v0.t +; RV32-NEXT: vadd.vi v9, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -2874,39 +2811,38 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v9, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t -; RV64-NEXT: vand.vx v9, v9, a0, v0.t +; RV64-NEXT: vand.vx v9, v9, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v9, v8, a1, v0.t +; RV64-NEXT: vand.vx v9, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v9, v8, v0.t ; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v9, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64> %va, i1 true, <2 x i1> %m, i32 %evl) @@ -2916,18 +2852,17 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vnot.v v9, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v9, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v8, v9, v8 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v9, v8, 1 ; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -2958,38 +2893,37 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext % ; ; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV64-NEXT: vsub.vx v9, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v9, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 1 -; RV64-NEXT: vand.vx v9, v9, a0 +; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 -; RV64-NEXT: vand.vx v9, v8, a1 +; RV64-NEXT: vand.vx v9, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v9 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3000,9 +2934,8 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext % define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v4i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsub.vx v10, v8, a1, v0.t +; RV32-NEXT: vadd.vi v10, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -3042,39 +2975,38 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v10, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 1, v0.t -; RV64-NEXT: vand.vx v10, v10, a0, v0.t +; RV64-NEXT: vand.vx v10, v10, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v10, v8, a1, v0.t +; RV64-NEXT: vand.vx v10, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v10, v8, v0.t ; RV64-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v10, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64> %va, i1 true, <4 x i1> %m, i32 %evl) @@ -3084,18 +3016,17 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vnot.v v10, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v10, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v8, v10, v8 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v10, v8, 1 ; RV32-NEXT: vand.vv v10, v10, v12 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -3126,38 +3057,37 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext % ; ; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV64-NEXT: vsub.vx v10, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v10, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v10 ; RV64-NEXT: vsrl.vi v10, v8, 1 -; RV64-NEXT: vand.vx v10, v10, a0 +; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vand.vx v10, v8, a1 +; RV64-NEXT: vand.vx v10, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v10, v8 ; RV64-NEXT: vsrl.vi v10, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v10 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3168,9 +3098,8 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext % define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsub.vx v12, v8, a1, v0.t +; RV32-NEXT: vadd.vi v12, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -3210,39 +3139,38 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v12, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV64-NEXT: vand.vx v12, v12, a0, v0.t +; RV64-NEXT: vand.vx v12, v12, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v12, v8, a1, v0.t +; RV64-NEXT: vand.vx v12, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v12, v8, v0.t ; RV64-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v12, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64> %va, i1 true, <8 x i1> %m, i32 %evl) @@ -3252,18 +3180,17 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vnot.v v12, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v12, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v8, v12, v8 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v12, v8, 1 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -3294,38 +3221,37 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % ; ; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV64-NEXT: vsub.vx v12, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v12, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v12 ; RV64-NEXT: vsrl.vi v12, v8, 1 -; RV64-NEXT: vand.vx v12, v12, a0 +; RV64-NEXT: vand.vx v12, v12, a1 ; RV64-NEXT: vsub.vv v8, v8, v12 -; RV64-NEXT: vand.vx v12, v8, a1 +; RV64-NEXT: vand.vx v12, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v12, v8 ; RV64-NEXT: vsrl.vi v12, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v12 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3336,9 +3262,8 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3378,39 +3303,38 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl) @@ -3420,18 +3344,17 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3462,38 +3385,37 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3504,9 +3426,8 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3546,39 +3467,38 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vand.vx v16, v16, a1, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a1, v0.t +; RV64-NEXT: vand.vx v16, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vand.vx v8, v8, a2, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t ; RV64-NEXT: ret %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl) @@ -3588,18 +3508,17 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8 -; RV32-NEXT: vsub.vx v8, v8, a1 +; RV32-NEXT: vadd.vi v16, v8, -1 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: lui a1, 209715 -; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vand.vv v24, v16, v24 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma @@ -3630,38 +3549,37 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64_unmasked: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a4, 61681 -; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a3, a3, -241 +; RV64-NEXT: addiw a4, a4, 257 +; RV64-NEXT: slli a5, a1, 32 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: slli a5, a2, 32 +; RV64-NEXT: add a2, a2, a5 +; RV64-NEXT: slli a5, a3, 32 +; RV64-NEXT: add a3, a3, a5 +; RV64-NEXT: slli a5, a4, 32 +; RV64-NEXT: add a4, a4, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1 -; RV64-NEXT: addiw a0, a2, 1365 -; RV64-NEXT: addiw a1, a3, 819 -; RV64-NEXT: addiw a2, a4, -241 -; RV64-NEXT: addiw a3, a5, 257 -; RV64-NEXT: slli a4, a0, 32 -; RV64-NEXT: add a0, a0, a4 -; RV64-NEXT: slli a4, a1, 32 -; RV64-NEXT: add a1, a1, a4 -; RV64-NEXT: slli a4, a2, 32 -; RV64-NEXT: add a2, a2, a4 -; RV64-NEXT: slli a4, a3, 32 -; RV64-NEXT: add a3, a3, a4 +; RV64-NEXT: vadd.vi v16, v8, -1 ; RV64-NEXT: vnot.v v8, v8 ; RV64-NEXT: vand.vv v8, v8, v16 ; RV64-NEXT: vsrl.vi v16, v8, 1 -; RV64-NEXT: vand.vx v16, v16, a0 +; RV64-NEXT: vand.vx v16, v16, a1 ; RV64-NEXT: vsub.vv v8, v8, v16 -; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vand.vx v16, v8, a2 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vand.vx v8, v8, a2 ; RV64-NEXT: vadd.vv v8, v16, v8 ; RV64-NEXT: vsrl.vi v16, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vand.vx v8, v8, a2 -; RV64-NEXT: vmul.vx v8, v8, a3 +; RV64-NEXT: vand.vx v8, v8, a3 +; RV64-NEXT: vmul.vx v8, v8, a4 ; RV64-NEXT: li a0, 56 ; RV64-NEXT: vsrl.vx v8, v8, a0 ; RV64-NEXT: ret @@ -3684,152 +3602,151 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: li a1, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v24, v0, 2 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB70_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB70_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB70_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t +; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a3, a3, 1365 +; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 209715 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 61681 -; RV32-NEXT: addi a3, a3, -241 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 61681 +; RV32-NEXT: addi a2, a2, -241 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: lui a3, 4112 -; RV32-NEXT: addi a3, a3, 257 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: and a0, a0, a2 ; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vadd.vi v16, v8, -1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 @@ -3841,8 +3758,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3856,29 +3773,29 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3886,8 +3803,8 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -3898,7 +3815,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 @@ -3926,75 +3843,74 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a4, a0 -; RV64-NEXT: bltu a0, a1, .LBB70_2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB70_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a4, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB70_2: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: lui a3, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: addiw a3, a3, 819 -; RV64-NEXT: addiw a7, a5, -241 -; RV64-NEXT: addiw t0, a6, 257 -; RV64-NEXT: slli a6, a2, 32 -; RV64-NEXT: add a6, a2, a6 -; RV64-NEXT: slli a5, a3, 32 -; RV64-NEXT: add a5, a3, a5 -; RV64-NEXT: slli a2, a7, 32 -; RV64-NEXT: add a2, a7, a2 -; RV64-NEXT: slli a3, t0, 32 -; RV64-NEXT: add a3, t0, a3 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: lui a2, 209715 +; RV64-NEXT: lui a3, 61681 +; RV64-NEXT: lui a4, 4112 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: addiw a2, a2, 819 +; RV64-NEXT: addiw a5, a3, -241 +; RV64-NEXT: addiw a6, a4, 257 +; RV64-NEXT: slli a4, a1, 32 +; RV64-NEXT: add a4, a1, a4 +; RV64-NEXT: slli a3, a2, 32 +; RV64-NEXT: add a3, a2, a3 +; RV64-NEXT: slli a1, a5, 32 +; RV64-NEXT: add a1, a5, a1 +; RV64-NEXT: slli a2, a6, 32 +; RV64-NEXT: add a2, a6, a2 +; RV64-NEXT: addi a5, a0, -16 +; RV64-NEXT: sltu a0, a0, a5 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a7, a0, a7 +; RV64-NEXT: and a5, a0, a5 ; RV64-NEXT: li a0, 56 -; RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV64-NEXT: addi a4, sp, 16 -; RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV64-NEXT: addi a6, sp, 16 +; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 3 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t +; RV64-NEXT: csrr a6, vlenb +; RV64-NEXT: slli a6, a6, 3 +; RV64-NEXT: add a6, sp, a6 +; RV64-NEXT: addi a6, a6, 16 +; RV64-NEXT: vl8r.v v8, (a6) # Unknown-size Folded Reload +; RV64-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; RV64-NEXT: vadd.vi v16, v8, -1, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t ; RV64-NEXT: vand.vv v8, v8, v16, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a6, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t ; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a5, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a5, v0.t +; RV64-NEXT: vand.vx v8, v8, a3, v0.t ; RV64-NEXT: vadd.vv v8, v16, v8, v0.t ; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vmul.vx v8, v8, a3, v0.t +; RV64-NEXT: vand.vx v8, v8, a1, v0.t +; RV64-NEXT: vmul.vx v8, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4021,69 +3937,78 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: li a2, 1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vi v24, v8, -1 ; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 -; RV32-NEXT: addi a3, a0, -16 -; RV32-NEXT: sltu a0, a0, a3 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v8, a2 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, a0, -16 +; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: lui a3, 209715 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a2 -; RV32-NEXT: addi a3, a3, 819 -; RV32-NEXT: vand.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 1 -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v8, v8, v0 +; RV32-NEXT: vand.vv v24, v0, v24 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsrl.vi v0, v24, 1 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vsub.vv v24, v24, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v0, v16, a2 +; RV32-NEXT: vadd.vi v0, v16, -1 ; RV32-NEXT: vnot.v v16, v16 ; RV32-NEXT: vand.vv v0, v16, v0 -; RV32-NEXT: vsrl.vi v16, v0, 1 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vadd.vv v8, v24, v8 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v24, v16 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v0, 1 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vsub.vv v24, v0, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v0, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v0, v24, v16 ; RV32-NEXT: vsrl.vi v24, v24, 2 ; RV32-NEXT: vand.vv v16, v24, v16 ; RV32-NEXT: lui a2, 61681 -; RV32-NEXT: lui a3, 4112 ; RV32-NEXT: addi a2, a2, -241 -; RV32-NEXT: addi a3, a3, 257 ; RV32-NEXT: vadd.vv v16, v0, v16 ; RV32-NEXT: vsrl.vi v24, v16, 4 ; RV32-NEXT: vadd.vv v16, v16, v24 -; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: lui a2, 4112 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a3 +; RV32-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -4094,7 +4019,7 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 ; RV32-NEXT: addi sp, sp, 16 @@ -4109,70 +4034,69 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB71_2: -; RV64-NEXT: li a2, 1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vnot.v v24, v8 -; RV64-NEXT: lui a3, 349525 -; RV64-NEXT: lui a4, 209715 -; RV64-NEXT: lui a5, 61681 -; RV64-NEXT: lui a6, 4112 -; RV64-NEXT: addiw a3, a3, 1365 -; RV64-NEXT: addiw a4, a4, 819 -; RV64-NEXT: addiw a5, a5, -241 -; RV64-NEXT: addiw a6, a6, 257 -; RV64-NEXT: slli a7, a3, 32 -; RV64-NEXT: add a3, a3, a7 -; RV64-NEXT: slli a7, a4, 32 -; RV64-NEXT: add a4, a4, a7 -; RV64-NEXT: slli a7, a5, 32 -; RV64-NEXT: add a5, a5, a7 -; RV64-NEXT: slli a7, a6, 32 -; RV64-NEXT: add a6, a6, a7 -; RV64-NEXT: addi a7, a0, -16 -; RV64-NEXT: sltu a0, a0, a7 +; RV64-NEXT: vadd.vi v24, v8, -1 +; RV64-NEXT: vnot.v v8, v8 +; RV64-NEXT: lui a2, 349525 +; RV64-NEXT: lui a3, 209715 +; RV64-NEXT: lui a4, 61681 +; RV64-NEXT: lui a5, 4112 +; RV64-NEXT: addiw a2, a2, 1365 +; RV64-NEXT: addiw a3, a3, 819 +; RV64-NEXT: addiw a4, a4, -241 +; RV64-NEXT: addiw a5, a5, 257 +; RV64-NEXT: slli a6, a2, 32 +; RV64-NEXT: add a2, a2, a6 +; RV64-NEXT: slli a6, a3, 32 +; RV64-NEXT: add a3, a3, a6 +; RV64-NEXT: slli a6, a4, 32 +; RV64-NEXT: add a4, a4, a6 +; RV64-NEXT: slli a6, a5, 32 +; RV64-NEXT: add a5, a5, a6 +; RV64-NEXT: addi a6, a0, -16 +; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a0, a7 -; RV64-NEXT: li a7, 56 -; RV64-NEXT: vsub.vx v8, v8, a2 -; RV64-NEXT: vand.vv v8, v24, v8 +; RV64-NEXT: and a0, a0, a6 +; RV64-NEXT: li a6, 56 +; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vsrl.vi v24, v8, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v24, v16, a2 +; RV64-NEXT: vadd.vi v24, v16, -1 ; RV64-NEXT: vnot.v v16, v16 ; RV64-NEXT: vand.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v8, a4 +; RV64-NEXT: vand.vx v24, v8, a3 ; RV64-NEXT: vsrl.vi v8, v8, 2 -; RV64-NEXT: vand.vx v8, v8, a4 +; RV64-NEXT: vand.vx v8, v8, a3 ; RV64-NEXT: vadd.vv v8, v24, v8 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 1 -; RV64-NEXT: vand.vx v24, v24, a3 +; RV64-NEXT: vand.vx v24, v24, a2 ; RV64-NEXT: vsub.vv v16, v16, v24 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 4 ; RV64-NEXT: vadd.vv v8, v8, v24 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v24, v16, a4 +; RV64-NEXT: vand.vx v24, v16, a3 ; RV64-NEXT: vsrl.vi v16, v16, 2 -; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v8, a5 +; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v24, v16 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmul.vx v8, v8, a6 +; RV64-NEXT: vmul.vx v8, v8, a5 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v16, 4 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: vsrl.vx v8, v8, a6 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vadd.vv v16, v16, v24 -; RV64-NEXT: vand.vx v16, v16, a5 -; RV64-NEXT: vmul.vx v16, v16, a6 -; RV64-NEXT: vsrl.vx v16, v16, a7 +; RV64-NEXT: vand.vx v16, v16, a4 +; RV64-NEXT: vmul.vx v16, v16, a5 +; RV64-NEXT: vsrl.vx v16, v16, a6 ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> splat (i1 true), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 57e0eeb92ee2f..1724b92a9ab48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -13,9 +13,8 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -90,10 +89,9 @@ define void @cttz_v8i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -168,10 +166,9 @@ define void @cttz_v4i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -254,11 +251,10 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v9, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v10 ; RV32I-NEXT: vsrl.vi v10, v8, 1 @@ -310,8 +306,7 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v9, v8, a5 +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -392,9 +387,8 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVI-NEXT: li a1, 32 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -472,10 +466,9 @@ define void @cttz_v16i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -550,10 +543,9 @@ define void @cttz_v8i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -636,11 +628,10 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v10, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v12 ; RV32I-NEXT: vsrl.vi v12, v8, 1 @@ -692,8 +683,7 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v10, v8, a5 +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 @@ -773,9 +763,8 @@ define void @cttz_zero_undef_v16i8(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -845,10 +834,9 @@ define void @cttz_zero_undef_v8i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -916,10 +904,9 @@ define void @cttz_zero_undef_v4i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v9, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v9, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v9 ; RVI-NEXT: vsrl.vi v9, v8, 1 @@ -995,11 +982,10 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32I-NEXT: vmv.v.x v9, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32I-NEXT: vsub.vx v10, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32I-NEXT: vadd.vi v10, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v10 ; RV32I-NEXT: vsrl.vi v10, v8, 1 @@ -1051,8 +1037,7 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v9, v8, a5 +; RV64I-NEXT: vadd.vi v9, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v9 ; RV64I-NEXT: vsrl.vi v9, v8, 1 @@ -1123,9 +1108,8 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { ; RVI-NEXT: li a1, 32 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: li a1, 85 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -1198,10 +1182,9 @@ define void @cttz_zero_undef_v16i16(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RVI-NEXT: vle16.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 5 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -1269,10 +1252,9 @@ define void @cttz_zero_undef_v8i32(ptr %x, ptr %y) nounwind { ; RVI: # %bb.0: ; RVI-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVI-NEXT: vle32.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 ; RVI-NEXT: lui a1, 349525 ; RVI-NEXT: addi a1, a1, 1365 +; RVI-NEXT: vadd.vi v10, v8, -1 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 ; RVI-NEXT: vsrl.vi v10, v8, 1 @@ -1348,11 +1330,10 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; RV32I-NEXT: addi a1, a1, 1365 ; RV32I-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32I-NEXT: vmv.v.x v10, a1 -; RV32I-NEXT: li a1, 1 -; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32I-NEXT: vsub.vx v12, v8, a1 ; RV32I-NEXT: lui a1, 209715 ; RV32I-NEXT: addi a1, a1, 819 +; RV32I-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32I-NEXT: vadd.vi v12, v8, -1 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v12 ; RV32I-NEXT: vsrl.vi v12, v8, 1 @@ -1404,8 +1385,7 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind { ; RV64I-NEXT: add a3, a3, a5 ; RV64I-NEXT: slli a5, a4, 32 ; RV64I-NEXT: add a4, a4, a5 -; RV64I-NEXT: li a5, 1 -; RV64I-NEXT: vsub.vx v10, v8, a5 +; RV64I-NEXT: vadd.vi v10, v8, -1 ; RV64I-NEXT: vnot.v v8, v8 ; RV64I-NEXT: vand.vv v8, v8, v10 ; RV64I-NEXT: vsrl.vi v10, v8, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 39fd70beb9ee2..0436a27409f81 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4031,8 +4031,7 @@ define void @sub_vi_v16i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x @@ -4046,8 +4045,7 @@ define void @sub_vi_v8i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -4061,8 +4059,7 @@ define void @sub_vi_v4i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %x @@ -4076,8 +4073,7 @@ define void @sub_vi_v2i64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsub.vx v8, v8, a1 +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll index 3a8d08f306a51..f3ad54afa21c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll @@ -39,9 +39,8 @@ define @i1_zext_add_commuted( %a, @i1_zext_sub( %a, %b) { ; CHECK-LABEL: i1_zext_sub: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu -; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %zext = zext %a to %sub = sub %b, %zext diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll index 0bd82e654e021..d34b401529202 100644 --- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll @@ -75,9 +75,8 @@ define @test_urem_vec_odd_divisor_eq0( %x) define @test_urem_vec_even_divisor_eq1( %x) nounwind { ; RV32-LABEL: test_urem_vec_even_divisor_eq1: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a0 +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vadd.vi v8, v8, -1 ; RV32-NEXT: lui a0, 1048571 ; RV32-NEXT: addi a0, a0, -1365 ; RV32-NEXT: vmul.vx v8, v8, a0 @@ -93,9 +92,8 @@ define @test_urem_vec_even_divisor_eq1( %x) ; ; RV64-LABEL: test_urem_vec_even_divisor_eq1: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1 ; RV64-NEXT: lui a0, 1048571 ; RV64-NEXT: addi a0, a0, -1365 ; RV64-NEXT: vmul.vx v8, v8, a0 @@ -117,9 +115,8 @@ define @test_urem_vec_even_divisor_eq1( %x) define @test_urem_vec_odd_divisor_eq1( %x) nounwind { ; RV32-LABEL: test_urem_vec_odd_divisor_eq1: ; RV32: # %bb.0: -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV32-NEXT: vsub.vx v8, v8, a0 +; RV32-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV32-NEXT: vadd.vi v8, v8, -1 ; RV32-NEXT: lui a0, 1048573 ; RV32-NEXT: addi a0, a0, -819 ; RV32-NEXT: vmul.vx v8, v8, a0 @@ -132,9 +129,8 @@ define @test_urem_vec_odd_divisor_eq1( %x) ; ; RV64-LABEL: test_urem_vec_odd_divisor_eq1: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 1 -; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; RV64-NEXT: vsub.vx v8, v8, a0 +; RV64-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1 ; RV64-NEXT: lui a0, 1048573 ; RV64-NEXT: addi a0, a0, -819 ; RV64-NEXT: vmul.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index 58b6f0253b99a..d2ef711fc0d74 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -156,7 +156,6 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) ; NO_FOLDING-NEXT: vmv.v.i v11, 0 -; NO_FOLDING-NEXT: li a0, 1 ; NO_FOLDING-NEXT: vmv.v.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; NO_FOLDING-NEXT: vmv.v.v v0, v9 @@ -166,7 +165,7 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 ; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv.v.v v0, v8 -; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; NO_FOLDING-NEXT: vor.vv v8, v9, v10 ; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret @@ -178,7 +177,6 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) ; FOLDING-NEXT: vmv.v.i v11, 0 -; FOLDING-NEXT: li a0, 1 ; FOLDING-NEXT: vmv.v.v v0, v8 ; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; FOLDING-NEXT: vmv.v.v v0, v9 @@ -188,7 +186,7 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; FOLDING-NEXT: vmul.vv v9, v12, v9 ; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv.v.v v0, v8 -; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; FOLDING-NEXT: vor.vv v8, v9, v10 ; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret @@ -214,7 +212,6 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) ; NO_FOLDING-NEXT: vmv.v.i v11, 0 -; NO_FOLDING-NEXT: li a0, 1 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; NO_FOLDING-NEXT: vmv1r.v v0, v9 @@ -224,7 +221,7 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 ; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; NO_FOLDING-NEXT: vor.vv v8, v9, v10 ; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret @@ -236,7 +233,6 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) ; FOLDING-NEXT: vmv.v.i v11, 0 -; FOLDING-NEXT: li a0, 1 ; FOLDING-NEXT: vmv1r.v v0, v8 ; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 ; FOLDING-NEXT: vmv1r.v v0, v9 @@ -246,7 +242,7 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vmul.vv v9, v12, v9 ; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vadd.vi v10, v10, -1, v0.t ; FOLDING-NEXT: vor.vv v8, v9, v10 ; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll index b43de0de49514..7442be92fffcd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll @@ -853,9 +853,8 @@ define @vsub_vx_mask_nxv8i32( %va, i32 sign define @vsub_vi_nxv8i32_one( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_one: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 1) ret %vc @@ -864,9 +863,8 @@ define @vsub_vi_nxv8i32_one( %va) { define @vsub_vi_nxv8i32_minusone( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_minusone: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 -1) ret %vc @@ -875,9 +873,8 @@ define @vsub_vi_nxv8i32_minusone( %va) { define @vsub_vi_nxv8i32_15( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_15: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 15 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -15 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 15) ret %vc @@ -886,9 +883,8 @@ define @vsub_vi_nxv8i32_15( %va) { define @vsub_vi_nxv8i32_16( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_16: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 16 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, -16 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 16) ret %vc @@ -897,9 +893,8 @@ define @vsub_vi_nxv8i32_16( %va) { define @vsub_vi_nxv8i32_minus15( %va) { ; CHECK-LABEL: vsub_vi_nxv8i32_minus15: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -15 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vsub.vx v8, v8, a0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vadd.vi v8, v8, 15 ; CHECK-NEXT: ret %vc = sub %va, splat (i32 -15) ret %vc @@ -919,9 +914,8 @@ define @vsub_vi_nxv8i32_minus16( %va) { define @vsub_vi_mask_nxv8i32( %va, %mask) { ; CHECK-LABEL: vsub_vi_mask_nxv8i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 7 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, mu -; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, -7, v0.t ; CHECK-NEXT: ret %vs = select %mask, splat (i32 7), zeroinitializer %vc = sub %va, %vs