diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index c5432619a3646..c3922e38729dc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1026,13 +1026,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { unsigned ShAmt = N1C->getZExtValue(); uint64_t Mask = N0.getConstantOperandVal(1); - // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has - // 32 leading zeros and C3 trailing zeros. if (ShAmt <= 32 && isShiftedMask_64(Mask)) { unsigned XLen = Subtarget->getXLen(); unsigned LeadingZeros = XLen - llvm::bit_width(Mask); unsigned TrailingZeros = llvm::countr_zero(Mask); if (TrailingZeros > 0 && LeadingZeros == 32) { + // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) + // where C2 has 32 leading zeros and C3 trailing zeros. SDNode *SRLIW = CurDAG->getMachineNode( RISCV::SRLIW, DL, VT, N0->getOperand(0), CurDAG->getTargetConstant(TrailingZeros, DL, VT)); @@ -1042,6 +1042,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, SLLI); return; } + if (TrailingZeros == 0 && LeadingZeros > ShAmt && + XLen - LeadingZeros > 11 && LeadingZeros != 32) { + // Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C) + // where C2 has C4 leading zeros and no trailing zeros. + // This is profitable if the "and" was to be lowered to + // (srli (slli X, C4), C4) and not (andi X, C2). + // For "LeadingZeros == 32": + // - with Zba it's just (slli.uw X, C) + // - without Zba a tablegen pattern applies the very same + // transform as we would have done here + SDNode *SLLI = CurDAG->getMachineNode( + RISCV::SLLI, DL, VT, N0->getOperand(0), + CurDAG->getTargetConstant(LeadingZeros, DL, VT)); + SDNode *SRLI = CurDAG->getMachineNode( + RISCV::SRLI, DL, VT, SDValue(SLLI, 0), + CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT)); + ReplaceNode(Node, SRLI); + return; + } } break; } diff --git a/llvm/test/CodeGen/RISCV/and-shl.ll b/llvm/test/CodeGen/RISCV/and-shl.ll new file mode 100644 index 0000000000000..c3cb5d8e2e37d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/and-shl.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +define i32 @and_0xfff_shl_2(i32 %x) { +; RV32I-LABEL: and_0xfff_shl_2: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a0, a0, 18 +; RV32I-NEXT: ret +; +; RV64I-LABEL: and_0xfff_shl_2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 52 +; RV64I-NEXT: srli a0, a0, 50 +; RV64I-NEXT: ret + %a = and i32 %x, 4095 + %s = shl i32 %a, 2 + ret i32 %s +} + +define i32 @and_0x7ff_shl_2(i32 %x) { +; RV32I-LABEL: and_0x7ff_shl_2: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a0, a0, 2047 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: and_0x7ff_shl_2: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a0, a0, 2047 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: ret + %a = and i32 %x, 2047 + %s = shl i32 %a, 2 + ret i32 %s +} + +define i64 @and_0xffffffff_shl_2(i64 %x) { +; RV32I-LABEL: and_0xffffffff_shl_2: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: srli a1, a0, 30 +; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: and_0xffffffff_shl_2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 30 +; RV64I-NEXT: ret + %a = and i64 %x, 4294967295 + %s = shl i64 %a, 2 + ret i64 %s +} + +define i32 @and_0xfff_shl_2_multi_use(i32 %x) { +; RV32I-LABEL: and_0xfff_shl_2_multi_use: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a0, a0, 20 +; RV32I-NEXT: srli a0, a0, 20 +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: and_0xfff_shl_2_multi_use: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a0, a0, 52 +; RV64I-NEXT: srli a0, a0, 52 +; RV64I-NEXT: slli a1, a0, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret + %a = and i32 %x, 4095 + %s = shl i32 %a, 2 + %r = add i32 %a, %s + ret i32 %r +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll index 7f4483a8f77d9..ddcb3c3121bc3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll @@ -124,42 +124,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFH32: # %bb.0: ; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH32-NEXT: lui a1, 8 ; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH32-NEXT: vmv.x.s a2, v9 -; ZVFH32-NEXT: addi a1, a1, -1 +; ZVFH32-NEXT: vmv.x.s a1, v9 ; ZVFH32-NEXT: vslidedown.vi v9, v9, 1 -; ZVFH32-NEXT: vmv.x.s a3, v8 -; ZVFH32-NEXT: and a2, a2, a1 -; ZVFH32-NEXT: vmv.x.s a4, v9 -; ZVFH32-NEXT: and a1, a4, a1 -; ZVFH32-NEXT: slli a4, a3, 17 -; ZVFH32-NEXT: slli a3, a3, 30 -; ZVFH32-NEXT: srli a4, a4, 19 -; ZVFH32-NEXT: slli a1, a1, 15 -; ZVFH32-NEXT: or a2, a2, a3 -; ZVFH32-NEXT: or a1, a2, a1 +; ZVFH32-NEXT: vmv.x.s a2, v8 +; ZVFH32-NEXT: slli a1, a1, 17 +; ZVFH32-NEXT: srli a1, a1, 17 +; ZVFH32-NEXT: slli a3, a2, 30 +; ZVFH32-NEXT: or a1, a1, a3 +; ZVFH32-NEXT: vmv.x.s a3, v9 +; ZVFH32-NEXT: slli a2, a2, 17 +; ZVFH32-NEXT: slli a3, a3, 17 +; ZVFH32-NEXT: srli a2, a2, 19 +; ZVFH32-NEXT: srli a3, a3, 2 +; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) -; ZVFH32-NEXT: sh a4, 4(a0) +; ZVFH32-NEXT: sh a2, 4(a0) ; ZVFH32-NEXT: ret ; ; ZVFH64-LABEL: fp2si_v3f32_v3i15: ; ZVFH64: # %bb.0: ; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH64-NEXT: lui a1, 8 -; ZVFH64-NEXT: vmv.x.s a2, v9 -; ZVFH64-NEXT: addiw a1, a1, -1 +; ZVFH64-NEXT: vmv.x.s a1, v9 ; ZVFH64-NEXT: vslidedown.vi v8, v9, 1 ; ZVFH64-NEXT: vslidedown.vi v9, v9, 2 -; ZVFH64-NEXT: and a2, a2, a1 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: and a1, a3, a1 +; ZVFH64-NEXT: slli a1, a1, 49 +; ZVFH64-NEXT: vmv.x.s a2, v8 ; ZVFH64-NEXT: vmv.x.s a3, v9 +; ZVFH64-NEXT: srli a1, a1, 49 +; ZVFH64-NEXT: slli a2, a2, 49 ; ZVFH64-NEXT: slli a3, a3, 30 -; ZVFH64-NEXT: slli a1, a1, 15 -; ZVFH64-NEXT: or a2, a2, a3 -; ZVFH64-NEXT: or a1, a2, a1 +; ZVFH64-NEXT: srli a2, a2, 34 +; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: slli a2, a1, 19 ; ZVFH64-NEXT: srli a2, a2, 51 ; ZVFH64-NEXT: sw a1, 0(a0) @@ -170,42 +168,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32: # %bb.0: ; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN32-NEXT: lui a1, 8 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN32-NEXT: vmv.x.s a2, v9 -; ZVFHMIN32-NEXT: addi a1, a1, -1 +; ZVFHMIN32-NEXT: vmv.x.s a1, v9 ; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1 -; ZVFHMIN32-NEXT: vmv.x.s a3, v8 -; ZVFHMIN32-NEXT: and a2, a2, a1 -; ZVFHMIN32-NEXT: vmv.x.s a4, v9 -; ZVFHMIN32-NEXT: and a1, a4, a1 -; ZVFHMIN32-NEXT: slli a4, a3, 17 -; ZVFHMIN32-NEXT: slli a3, a3, 30 -; ZVFHMIN32-NEXT: srli a4, a4, 19 -; ZVFHMIN32-NEXT: slli a1, a1, 15 -; ZVFHMIN32-NEXT: or a2, a2, a3 -; ZVFHMIN32-NEXT: or a1, a2, a1 +; ZVFHMIN32-NEXT: vmv.x.s a2, v8 +; ZVFHMIN32-NEXT: slli a1, a1, 17 +; ZVFHMIN32-NEXT: srli a1, a1, 17 +; ZVFHMIN32-NEXT: slli a3, a2, 30 +; ZVFHMIN32-NEXT: or a1, a1, a3 +; ZVFHMIN32-NEXT: vmv.x.s a3, v9 +; ZVFHMIN32-NEXT: slli a2, a2, 17 +; ZVFHMIN32-NEXT: slli a3, a3, 17 +; ZVFHMIN32-NEXT: srli a2, a2, 19 +; ZVFHMIN32-NEXT: srli a3, a3, 2 +; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) -; ZVFHMIN32-NEXT: sh a4, 4(a0) +; ZVFHMIN32-NEXT: sh a2, 4(a0) ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15: ; ZVFHMIN64: # %bb.0: ; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN64-NEXT: lui a1, 8 -; ZVFHMIN64-NEXT: vmv.x.s a2, v9 -; ZVFHMIN64-NEXT: addiw a1, a1, -1 +; ZVFHMIN64-NEXT: vmv.x.s a1, v9 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1 ; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2 -; ZVFHMIN64-NEXT: and a2, a2, a1 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: and a1, a3, a1 +; ZVFHMIN64-NEXT: slli a1, a1, 49 +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 ; ZVFHMIN64-NEXT: vmv.x.s a3, v9 +; ZVFHMIN64-NEXT: srli a1, a1, 49 +; ZVFHMIN64-NEXT: slli a2, a2, 49 ; ZVFHMIN64-NEXT: slli a3, a3, 30 -; ZVFHMIN64-NEXT: slli a1, a1, 15 -; ZVFHMIN64-NEXT: or a2, a2, a3 -; ZVFHMIN64-NEXT: or a1, a2, a1 +; ZVFHMIN64-NEXT: srli a2, a2, 34 +; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: slli a2, a1, 19 ; ZVFHMIN64-NEXT: srli a2, a2, 51 ; ZVFHMIN64-NEXT: sw a1, 0(a0) @@ -221,42 +217,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFH32: # %bb.0: ; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH32-NEXT: lui a1, 16 ; ZVFH32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFH32-NEXT: vmv.x.s a2, v9 -; ZVFH32-NEXT: addi a1, a1, -1 +; ZVFH32-NEXT: vmv.x.s a1, v9 ; ZVFH32-NEXT: vslidedown.vi v9, v9, 1 -; ZVFH32-NEXT: vmv.x.s a3, v8 -; ZVFH32-NEXT: and a2, a2, a1 -; ZVFH32-NEXT: vmv.x.s a4, v9 -; ZVFH32-NEXT: and a1, a4, a1 -; ZVFH32-NEXT: slli a4, a3, 17 -; ZVFH32-NEXT: slli a3, a3, 30 -; ZVFH32-NEXT: srli a4, a4, 19 -; ZVFH32-NEXT: slli a1, a1, 15 -; ZVFH32-NEXT: or a2, a2, a3 -; ZVFH32-NEXT: or a1, a2, a1 +; ZVFH32-NEXT: vmv.x.s a2, v8 +; ZVFH32-NEXT: slli a1, a1, 16 +; ZVFH32-NEXT: srli a1, a1, 16 +; ZVFH32-NEXT: slli a3, a2, 30 +; ZVFH32-NEXT: or a1, a1, a3 +; ZVFH32-NEXT: vmv.x.s a3, v9 +; ZVFH32-NEXT: slli a2, a2, 17 +; ZVFH32-NEXT: slli a3, a3, 16 +; ZVFH32-NEXT: srli a2, a2, 19 +; ZVFH32-NEXT: srli a3, a3, 1 +; ZVFH32-NEXT: or a1, a1, a3 ; ZVFH32-NEXT: sw a1, 0(a0) -; ZVFH32-NEXT: sh a4, 4(a0) +; ZVFH32-NEXT: sh a2, 4(a0) ; ZVFH32-NEXT: ret ; ; ZVFH64-LABEL: fp2ui_v3f32_v3i15: ; ZVFH64: # %bb.0: ; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFH64-NEXT: lui a1, 16 -; ZVFH64-NEXT: vmv.x.s a2, v9 -; ZVFH64-NEXT: addiw a1, a1, -1 +; ZVFH64-NEXT: vmv.x.s a1, v9 ; ZVFH64-NEXT: vslidedown.vi v8, v9, 1 ; ZVFH64-NEXT: vslidedown.vi v9, v9, 2 -; ZVFH64-NEXT: and a2, a2, a1 -; ZVFH64-NEXT: vmv.x.s a3, v8 -; ZVFH64-NEXT: and a1, a3, a1 +; ZVFH64-NEXT: slli a1, a1, 48 +; ZVFH64-NEXT: vmv.x.s a2, v8 ; ZVFH64-NEXT: vmv.x.s a3, v9 +; ZVFH64-NEXT: srli a1, a1, 48 +; ZVFH64-NEXT: slli a2, a2, 48 ; ZVFH64-NEXT: slli a3, a3, 30 -; ZVFH64-NEXT: slli a1, a1, 15 -; ZVFH64-NEXT: or a2, a2, a3 -; ZVFH64-NEXT: or a1, a2, a1 +; ZVFH64-NEXT: srli a2, a2, 33 +; ZVFH64-NEXT: or a1, a1, a3 +; ZVFH64-NEXT: or a1, a1, a2 ; ZVFH64-NEXT: slli a2, a1, 19 ; ZVFH64-NEXT: srli a2, a2, 51 ; ZVFH64-NEXT: sw a1, 0(a0) @@ -267,42 +261,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) { ; ZVFHMIN32: # %bb.0: ; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN32-NEXT: lui a1, 16 ; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2 -; ZVFHMIN32-NEXT: vmv.x.s a2, v9 -; ZVFHMIN32-NEXT: addi a1, a1, -1 +; ZVFHMIN32-NEXT: vmv.x.s a1, v9 ; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1 -; ZVFHMIN32-NEXT: vmv.x.s a3, v8 -; ZVFHMIN32-NEXT: and a2, a2, a1 -; ZVFHMIN32-NEXT: vmv.x.s a4, v9 -; ZVFHMIN32-NEXT: and a1, a4, a1 -; ZVFHMIN32-NEXT: slli a4, a3, 17 -; ZVFHMIN32-NEXT: slli a3, a3, 30 -; ZVFHMIN32-NEXT: srli a4, a4, 19 -; ZVFHMIN32-NEXT: slli a1, a1, 15 -; ZVFHMIN32-NEXT: or a2, a2, a3 -; ZVFHMIN32-NEXT: or a1, a2, a1 +; ZVFHMIN32-NEXT: vmv.x.s a2, v8 +; ZVFHMIN32-NEXT: slli a1, a1, 16 +; ZVFHMIN32-NEXT: srli a1, a1, 16 +; ZVFHMIN32-NEXT: slli a3, a2, 30 +; ZVFHMIN32-NEXT: or a1, a1, a3 +; ZVFHMIN32-NEXT: vmv.x.s a3, v9 +; ZVFHMIN32-NEXT: slli a2, a2, 17 +; ZVFHMIN32-NEXT: slli a3, a3, 16 +; ZVFHMIN32-NEXT: srli a2, a2, 19 +; ZVFHMIN32-NEXT: srli a3, a3, 1 +; ZVFHMIN32-NEXT: or a1, a1, a3 ; ZVFHMIN32-NEXT: sw a1, 0(a0) -; ZVFHMIN32-NEXT: sh a4, 4(a0) +; ZVFHMIN32-NEXT: sh a2, 4(a0) ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15: ; ZVFHMIN64: # %bb.0: ; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8 -; ZVFHMIN64-NEXT: lui a1, 16 -; ZVFHMIN64-NEXT: vmv.x.s a2, v9 -; ZVFHMIN64-NEXT: addiw a1, a1, -1 +; ZVFHMIN64-NEXT: vmv.x.s a1, v9 ; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1 ; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2 -; ZVFHMIN64-NEXT: and a2, a2, a1 -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: and a1, a3, a1 +; ZVFHMIN64-NEXT: slli a1, a1, 48 +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 ; ZVFHMIN64-NEXT: vmv.x.s a3, v9 +; ZVFHMIN64-NEXT: srli a1, a1, 48 +; ZVFHMIN64-NEXT: slli a2, a2, 48 ; ZVFHMIN64-NEXT: slli a3, a3, 30 -; ZVFHMIN64-NEXT: slli a1, a1, 15 -; ZVFHMIN64-NEXT: or a2, a2, a3 -; ZVFHMIN64-NEXT: or a1, a2, a1 +; ZVFHMIN64-NEXT: srli a2, a2, 33 +; ZVFHMIN64-NEXT: or a1, a1, a3 +; ZVFHMIN64-NEXT: or a1, a1, a2 ; ZVFHMIN64-NEXT: slli a2, a1, 19 ; ZVFHMIN64-NEXT: srli a2, a2, 51 ; ZVFHMIN64-NEXT: sw a1, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index e9fd0a19e3eb6..139f7b4e6a0c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -3296,11 +3296,11 @@ define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) { ; RVA22U64-LABEL: buildvec_v4i16_pack: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: slli a3, a3, 48 -; RVA22U64-NEXT: zext.h a2, a2 +; RVA22U64-NEXT: slli a2, a2, 48 ; RVA22U64-NEXT: zext.h a0, a0 -; RVA22U64-NEXT: zext.h a1, a1 -; RVA22U64-NEXT: slli a2, a2, 32 -; RVA22U64-NEXT: slli a1, a1, 16 +; RVA22U64-NEXT: slli a1, a1, 48 +; RVA22U64-NEXT: srli a2, a2, 16 +; RVA22U64-NEXT: srli a1, a1, 32 ; RVA22U64-NEXT: or a2, a2, a3 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: or a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 141d54cf585f2..c6e12c52122d2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -3205,88 +3205,86 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a1, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB40_2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-NEXT: andi a2, a1, 1 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a3 +; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB40_2: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB40_4 +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB40_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_15 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: bnez a2, .LBB40_15 ; RV64ZVE32F-NEXT: .LBB40_6: # %else8 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_16 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: bnez a2, .LBB40_16 ; RV64ZVE32F-NEXT: .LBB40_7: # %else11 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB40_9 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_9 ; RV64ZVE32F-NEXT: .LBB40_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB40_11 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: .LBB40_11: # %else17 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB40_13 +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: beqz a1, .LBB40_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 48 +; RV64ZVE32F-NEXT: srli a1, a1, 46 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -3298,44 +3296,44 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB40_14: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB40_6 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_6 ; RV64ZVE32F-NEXT: .LBB40_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB40_7 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: beqz a2, .LBB40_7 ; RV64ZVE32F-NEXT: .LBB40_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: bnez a2, .LBB40_8 ; RV64ZVE32F-NEXT: j .LBB40_9 %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs @@ -5643,124 +5641,122 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a5, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a6, v0 -; RV64ZVE32F-NEXT: andi a3, a6, 1 -; RV64ZVE32F-NEXT: addiw a5, a5, -1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi a3, a5, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB53_3 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a5 -; RV64ZVE32F-NEXT: slli a3, a3, 3 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: ld a3, 0(a3) -; RV64ZVE32F-NEXT: andi a4, a6, 2 +; RV64ZVE32F-NEXT: andi a4, a5, 2 ; RV64ZVE32F-NEXT: bnez a4, .LBB53_4 ; RV64ZVE32F-NEXT: .LBB53_2: ; RV64ZVE32F-NEXT: ld a4, 8(a2) ; RV64ZVE32F-NEXT: j .LBB53_5 ; RV64ZVE32F-NEXT: .LBB53_3: ; RV64ZVE32F-NEXT: ld a3, 0(a2) -; RV64ZVE32F-NEXT: andi a4, a6, 2 +; RV64ZVE32F-NEXT: andi a4, a5, 2 ; RV64ZVE32F-NEXT: beqz a4, .LBB53_2 ; RV64ZVE32F-NEXT: .LBB53_4: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a4, v9 -; RV64ZVE32F-NEXT: and a4, a4, a5 -; RV64ZVE32F-NEXT: slli a4, a4, 3 +; RV64ZVE32F-NEXT: slli a4, a4, 48 +; RV64ZVE32F-NEXT: srli a4, a4, 45 ; RV64ZVE32F-NEXT: add a4, a1, a4 ; RV64ZVE32F-NEXT: ld a4, 0(a4) ; RV64ZVE32F-NEXT: .LBB53_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a7, a6, 4 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: beqz a7, .LBB53_10 +; RV64ZVE32F-NEXT: beqz a6, .LBB53_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a7, v8 -; RV64ZVE32F-NEXT: and a7, a7, a5 -; RV64ZVE32F-NEXT: slli a7, a7, 3 -; RV64ZVE32F-NEXT: add a7, a1, a7 -; RV64ZVE32F-NEXT: ld a7, 0(a7) -; RV64ZVE32F-NEXT: andi t0, a6, 8 -; RV64ZVE32F-NEXT: bnez t0, .LBB53_11 +; RV64ZVE32F-NEXT: vmv.x.s a6, v8 +; RV64ZVE32F-NEXT: slli a6, a6, 48 +; RV64ZVE32F-NEXT: srli a6, a6, 45 +; RV64ZVE32F-NEXT: add a6, a1, a6 +; RV64ZVE32F-NEXT: ld a6, 0(a6) +; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: bnez a7, .LBB53_11 ; RV64ZVE32F-NEXT: .LBB53_7: -; RV64ZVE32F-NEXT: ld t0, 24(a2) -; RV64ZVE32F-NEXT: andi t1, a6, 16 -; RV64ZVE32F-NEXT: bnez t1, .LBB53_12 +; RV64ZVE32F-NEXT: ld a7, 24(a2) +; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: bnez t0, .LBB53_12 ; RV64ZVE32F-NEXT: .LBB53_8: -; RV64ZVE32F-NEXT: ld t1, 32(a2) -; RV64ZVE32F-NEXT: andi t2, a6, 32 -; RV64ZVE32F-NEXT: bnez t2, .LBB53_13 +; RV64ZVE32F-NEXT: ld t0, 32(a2) +; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: bnez t1, .LBB53_13 ; RV64ZVE32F-NEXT: .LBB53_9: -; RV64ZVE32F-NEXT: ld t2, 40(a2) +; RV64ZVE32F-NEXT: ld t1, 40(a2) ; RV64ZVE32F-NEXT: j .LBB53_14 ; RV64ZVE32F-NEXT: .LBB53_10: -; RV64ZVE32F-NEXT: ld a7, 16(a2) -; RV64ZVE32F-NEXT: andi t0, a6, 8 -; RV64ZVE32F-NEXT: beqz t0, .LBB53_7 +; RV64ZVE32F-NEXT: ld a6, 16(a2) +; RV64ZVE32F-NEXT: andi a7, a5, 8 +; RV64ZVE32F-NEXT: beqz a7, .LBB53_7 ; RV64ZVE32F-NEXT: .LBB53_11: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s t0, v8 -; RV64ZVE32F-NEXT: and t0, t0, a5 -; RV64ZVE32F-NEXT: slli t0, t0, 3 +; RV64ZVE32F-NEXT: vmv.x.s a7, v8 +; RV64ZVE32F-NEXT: slli a7, a7, 48 +; RV64ZVE32F-NEXT: srli a7, a7, 45 +; RV64ZVE32F-NEXT: add a7, a1, a7 +; RV64ZVE32F-NEXT: ld a7, 0(a7) +; RV64ZVE32F-NEXT: andi t0, a5, 16 +; RV64ZVE32F-NEXT: beqz t0, .LBB53_8 +; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10 +; RV64ZVE32F-NEXT: vmv.x.s t0, v9 +; RV64ZVE32F-NEXT: slli t0, t0, 48 +; RV64ZVE32F-NEXT: srli t0, t0, 45 ; RV64ZVE32F-NEXT: add t0, a1, t0 ; RV64ZVE32F-NEXT: ld t0, 0(t0) -; RV64ZVE32F-NEXT: andi t1, a6, 16 -; RV64ZVE32F-NEXT: beqz t1, .LBB53_8 -; RV64ZVE32F-NEXT: .LBB53_12: # %cond.load10 -; RV64ZVE32F-NEXT: vmv.x.s t1, v9 -; RV64ZVE32F-NEXT: and t1, t1, a5 -; RV64ZVE32F-NEXT: slli t1, t1, 3 -; RV64ZVE32F-NEXT: add t1, a1, t1 -; RV64ZVE32F-NEXT: ld t1, 0(t1) -; RV64ZVE32F-NEXT: andi t2, a6, 32 -; RV64ZVE32F-NEXT: beqz t2, .LBB53_9 +; RV64ZVE32F-NEXT: andi t1, a5, 32 +; RV64ZVE32F-NEXT: beqz t1, .LBB53_9 ; RV64ZVE32F-NEXT: .LBB53_13: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s t2, v8 -; RV64ZVE32F-NEXT: and t2, t2, a5 -; RV64ZVE32F-NEXT: slli t2, t2, 3 -; RV64ZVE32F-NEXT: add t2, a1, t2 -; RV64ZVE32F-NEXT: ld t2, 0(t2) +; RV64ZVE32F-NEXT: vmv.x.s t1, v8 +; RV64ZVE32F-NEXT: slli t1, t1, 48 +; RV64ZVE32F-NEXT: srli t1, t1, 45 +; RV64ZVE32F-NEXT: add t1, a1, t1 +; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB53_14: # %else14 -; RV64ZVE32F-NEXT: andi t3, a6, 64 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz t3, .LBB53_17 +; RV64ZVE32F-NEXT: beqz t2, .LBB53_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s t3, v8 -; RV64ZVE32F-NEXT: and t3, t3, a5 -; RV64ZVE32F-NEXT: slli t3, t3, 3 -; RV64ZVE32F-NEXT: add t3, a1, t3 -; RV64ZVE32F-NEXT: ld t3, 0(t3) -; RV64ZVE32F-NEXT: andi a6, a6, -128 -; RV64ZVE32F-NEXT: bnez a6, .LBB53_18 +; RV64ZVE32F-NEXT: vmv.x.s t2, v8 +; RV64ZVE32F-NEXT: slli t2, t2, 48 +; RV64ZVE32F-NEXT: srli t2, t2, 45 +; RV64ZVE32F-NEXT: add t2, a1, t2 +; RV64ZVE32F-NEXT: ld t2, 0(t2) +; RV64ZVE32F-NEXT: andi a5, a5, -128 +; RV64ZVE32F-NEXT: bnez a5, .LBB53_18 ; RV64ZVE32F-NEXT: .LBB53_16: ; RV64ZVE32F-NEXT: ld a1, 56(a2) ; RV64ZVE32F-NEXT: j .LBB53_19 ; RV64ZVE32F-NEXT: .LBB53_17: -; RV64ZVE32F-NEXT: ld t3, 48(a2) -; RV64ZVE32F-NEXT: andi a6, a6, -128 -; RV64ZVE32F-NEXT: beqz a6, .LBB53_16 +; RV64ZVE32F-NEXT: ld t2, 48(a2) +; RV64ZVE32F-NEXT: andi a5, a5, -128 +; RV64ZVE32F-NEXT: beqz a5, .LBB53_16 ; RV64ZVE32F-NEXT: .LBB53_18: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: and a2, a2, a5 -; RV64ZVE32F-NEXT: slli a2, a2, 3 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: ld a1, 0(a1) ; RV64ZVE32F-NEXT: .LBB53_19: # %else20 ; RV64ZVE32F-NEXT: sd a3, 0(a0) ; RV64ZVE32F-NEXT: sd a4, 8(a0) -; RV64ZVE32F-NEXT: sd a7, 16(a0) -; RV64ZVE32F-NEXT: sd t0, 24(a0) -; RV64ZVE32F-NEXT: sd t1, 32(a0) -; RV64ZVE32F-NEXT: sd t2, 40(a0) -; RV64ZVE32F-NEXT: sd t3, 48(a0) +; RV64ZVE32F-NEXT: sd a6, 16(a0) +; RV64ZVE32F-NEXT: sd a7, 24(a0) +; RV64ZVE32F-NEXT: sd t0, 32(a0) +; RV64ZVE32F-NEXT: sd t1, 40(a0) +; RV64ZVE32F-NEXT: sd t2, 48(a0) ; RV64ZVE32F-NEXT: sd a1, 56(a0) ; RV64ZVE32F-NEXT: ret %eidxs = zext <8 x i16> %idxs to <8 x i64> @@ -10511,32 +10507,30 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a1, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-NEXT: andi a2, a1, 1 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB89_2: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_4 +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma @@ -10544,55 +10538,55 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_14 +; RV64ZVE32F-NEXT: bnez a2, .LBB89_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_15 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: bnez a2, .LBB89_15 ; RV64ZVE32F-NEXT: .LBB89_6: # %else8 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_16 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: bnez a2, .LBB89_16 ; RV64ZVE32F-NEXT: .LBB89_7: # %else11 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_9 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_9 ; RV64ZVE32F-NEXT: .LBB89_8: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB89_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_11 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: .LBB89_11: # %else17 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB89_13 +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: beqz a1, .LBB89_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 48 +; RV64ZVE32F-NEXT: srli a1, a1, 46 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw fa5, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -10604,44 +10598,44 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB89_14: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_6 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_6 ; RV64ZVE32F-NEXT: .LBB89_15: # %cond.load7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB89_7 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: beqz a2, .LBB89_7 ; RV64ZVE32F-NEXT: .LBB89_16: # %cond.load10 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: flw fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB89_8 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: bnez a2, .LBB89_8 ; RV64ZVE32F-NEXT: j .LBB89_9 %eidxs = zext <8 x i16> %idxs to <8 x i32> %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs @@ -12482,71 +12476,69 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; ; RV64ZVE32F-LABEL: mgather_baseidx_zext_v8i16_v8f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a2, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v0 -; RV64ZVE32F-NEXT: andi a4, a3, 1 -; RV64ZVE32F-NEXT: addiw a2, a2, -1 -; RV64ZVE32F-NEXT: beqz a4, .LBB102_2 +; RV64ZVE32F-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-NEXT: andi a3, a2, 1 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v8 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa0, 0(a4) +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa0, 0(a3) ; RV64ZVE32F-NEXT: .LBB102_2: # %else -; RV64ZVE32F-NEXT: andi a4, a3, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB102_4 +; RV64ZVE32F-NEXT: andi a3, a2, 2 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a4, v9 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa1, 0(a4) +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa1, 0(a3) ; RV64ZVE32F-NEXT: .LBB102_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a4, a3, 4 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a4, .LBB102_14 +; RV64ZVE32F-NEXT: bnez a3, .LBB102_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 -; RV64ZVE32F-NEXT: andi a4, a3, 8 -; RV64ZVE32F-NEXT: bnez a4, .LBB102_15 +; RV64ZVE32F-NEXT: andi a3, a2, 8 +; RV64ZVE32F-NEXT: bnez a3, .LBB102_15 ; RV64ZVE32F-NEXT: .LBB102_6: # %else8 -; RV64ZVE32F-NEXT: andi a4, a3, 16 -; RV64ZVE32F-NEXT: bnez a4, .LBB102_16 +; RV64ZVE32F-NEXT: andi a3, a2, 16 +; RV64ZVE32F-NEXT: bnez a3, .LBB102_16 ; RV64ZVE32F-NEXT: .LBB102_7: # %else11 -; RV64ZVE32F-NEXT: andi a4, a3, 32 -; RV64ZVE32F-NEXT: beqz a4, .LBB102_9 +; RV64ZVE32F-NEXT: andi a3, a2, 32 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_9 ; RV64ZVE32F-NEXT: .LBB102_8: # %cond.load13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s a4, v8 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa5, 0(a4) +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB102_9: # %else14 -; RV64ZVE32F-NEXT: andi a4, a3, 64 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: beqz a4, .LBB102_11 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 -; RV64ZVE32F-NEXT: vmv.x.s a4, v8 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa6, 0(a4) +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa6, 0(a3) ; RV64ZVE32F-NEXT: .LBB102_11: # %else17 -; RV64ZVE32F-NEXT: andi a3, a3, -128 -; RV64ZVE32F-NEXT: beqz a3, .LBB102_13 +; RV64ZVE32F-NEXT: andi a2, a2, -128 +; RV64ZVE32F-NEXT: beqz a2, .LBB102_13 ; RV64ZVE32F-NEXT: # %bb.12: # %cond.load19 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a2, a3, a2 -; RV64ZVE32F-NEXT: slli a2, a2, 3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 ; RV64ZVE32F-NEXT: add a1, a1, a2 ; RV64ZVE32F-NEXT: fld fa7, 0(a1) ; RV64ZVE32F-NEXT: .LBB102_13: # %else20 @@ -12560,30 +12552,30 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: fsd fa7, 56(a0) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB102_14: # %cond.load4 -; RV64ZVE32F-NEXT: vmv.x.s a4, v8 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa2, 0(a4) -; RV64ZVE32F-NEXT: andi a4, a3, 8 -; RV64ZVE32F-NEXT: beqz a4, .LBB102_6 +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa2, 0(a3) +; RV64ZVE32F-NEXT: andi a3, a2, 8 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_6 ; RV64ZVE32F-NEXT: .LBB102_15: # %cond.load7 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a4, v8 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa3, 0(a4) -; RV64ZVE32F-NEXT: andi a4, a3, 16 -; RV64ZVE32F-NEXT: beqz a4, .LBB102_7 +; RV64ZVE32F-NEXT: vmv.x.s a3, v8 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa3, 0(a3) +; RV64ZVE32F-NEXT: andi a3, a2, 16 +; RV64ZVE32F-NEXT: beqz a3, .LBB102_7 ; RV64ZVE32F-NEXT: .LBB102_16: # %cond.load10 -; RV64ZVE32F-NEXT: vmv.x.s a4, v9 -; RV64ZVE32F-NEXT: and a4, a4, a2 -; RV64ZVE32F-NEXT: slli a4, a4, 3 -; RV64ZVE32F-NEXT: add a4, a1, a4 -; RV64ZVE32F-NEXT: fld fa4, 0(a4) -; RV64ZVE32F-NEXT: andi a4, a3, 32 -; RV64ZVE32F-NEXT: bnez a4, .LBB102_8 +; RV64ZVE32F-NEXT: vmv.x.s a3, v9 +; RV64ZVE32F-NEXT: slli a3, a3, 48 +; RV64ZVE32F-NEXT: srli a3, a3, 45 +; RV64ZVE32F-NEXT: add a3, a1, a3 +; RV64ZVE32F-NEXT: fld fa4, 0(a3) +; RV64ZVE32F-NEXT: andi a3, a2, 32 +; RV64ZVE32F-NEXT: bnez a3, .LBB102_8 ; RV64ZVE32F-NEXT: j .LBB102_9 %eidxs = zext <8 x i16> %idxs to <8 x i64> %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 575a757149ebb..7ec4726925704 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -2588,123 +2588,121 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a1, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB34_2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-NEXT: andi a2, a1, 1 +; RV64ZVE32F-NEXT: beqz a2, .LBB34_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a3) +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: .LBB34_2: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB34_4 +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB34_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v11 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v11, (a3) +; RV64ZVE32F-NEXT: vse32.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB34_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB34_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB34_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB34_13 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: bnez a2, .LBB34_13 ; RV64ZVE32F-NEXT: .LBB34_6: # %else6 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB34_14 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: bnez a2, .LBB34_14 ; RV64ZVE32F-NEXT: .LBB34_7: # %else8 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB34_9 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: beqz a2, .LBB34_9 ; RV64ZVE32F-NEXT: .LBB34_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) +; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB34_9: # %else10 -; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB34_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB34_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB34_16 +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: bnez a1, .LBB34_16 ; RV64ZVE32F-NEXT: .LBB34_11: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.store3 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB34_6 +; RV64ZVE32F-NEXT: vse32.v v12, (a2) +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB34_6 ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v10, (a3) -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB34_7 +; RV64ZVE32F-NEXT: vse32.v v10, (a2) +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: beqz a2, .LBB34_7 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v11 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB34_8 +; RV64ZVE32F-NEXT: vse32.v v12, (a2) +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: bnez a2, .LBB34_8 ; RV64ZVE32F-NEXT: j .LBB34_9 ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.store11 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB34_11 +; RV64ZVE32F-NEXT: vse32.v v12, (a2) +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: beqz a1, .LBB34_11 ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v10 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v10 +; RV64ZVE32F-NEXT: slli a1, a1, 48 +; RV64ZVE32F-NEXT: srli a1, a1, 46 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 @@ -4794,109 +4792,107 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a5, 40(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) ; RV64ZVE32F-NEXT: ld a2, 56(a0) -; RV64ZVE32F-NEXT: ld t2, 8(a0) -; RV64ZVE32F-NEXT: ld t1, 16(a0) -; RV64ZVE32F-NEXT: ld t0, 24(a0) -; RV64ZVE32F-NEXT: ld a7, 32(a0) -; RV64ZVE32F-NEXT: lui a4, 16 +; RV64ZVE32F-NEXT: ld t1, 8(a0) +; RV64ZVE32F-NEXT: ld t0, 16(a0) +; RV64ZVE32F-NEXT: ld a7, 24(a0) +; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a6, v0 -; RV64ZVE32F-NEXT: andi t3, a6, 1 -; RV64ZVE32F-NEXT: addiw a4, a4, -1 -; RV64ZVE32F-NEXT: beqz t3, .LBB47_2 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 +; RV64ZVE32F-NEXT: beqz t2, .LBB47_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s t3, v8 -; RV64ZVE32F-NEXT: and t3, t3, a4 -; RV64ZVE32F-NEXT: slli t3, t3, 3 -; RV64ZVE32F-NEXT: add t3, a1, t3 -; RV64ZVE32F-NEXT: sd a0, 0(t3) +; RV64ZVE32F-NEXT: vmv.x.s t2, v8 +; RV64ZVE32F-NEXT: slli t2, t2, 48 +; RV64ZVE32F-NEXT: srli t2, t2, 45 +; RV64ZVE32F-NEXT: add t2, a1, t2 +; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB47_2: # %else -; RV64ZVE32F-NEXT: andi a0, a6, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd t2, 0(a0) +; RV64ZVE32F-NEXT: sd t1, 0(a0) ; RV64ZVE32F-NEXT: .LBB47_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a6, 4 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a6, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a6, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a6, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB47_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a6, 64 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a6, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB47_12: # %cond.store3 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd t1, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a6, 8 +; RV64ZVE32F-NEXT: sd t0, 0(a0) +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a6, 16 +; RV64ZVE32F-NEXT: sd a7, 0(a0) +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a6, 32 +; RV64ZVE32F-NEXT: sd a6, 0(a0) +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV64ZVE32F-NEXT: j .LBB47_9 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a6, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a0, v8 -; RV64ZVE32F-NEXT: and a0, a0, a4 -; RV64ZVE32F-NEXT: slli a0, a0, 3 +; RV64ZVE32F-NEXT: slli a0, a0, 48 +; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a2, 0(a0) ; RV64ZVE32F-NEXT: ret @@ -9463,123 +9459,121 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f32: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a1, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-NEXT: andi a2, a1, 1 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v8, (a3) +; RV64ZVE32F-NEXT: vse32.v v8, (a2) ; RV64ZVE32F-NEXT: .LBB83_2: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_4 +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v11 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 1 -; RV64ZVE32F-NEXT: vse32.v v11, (a3) +; RV64ZVE32F-NEXT: vse32.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB83_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_13 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_13 ; RV64ZVE32F-NEXT: .LBB83_6: # %else6 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_14 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_14 ; RV64ZVE32F-NEXT: .LBB83_7: # %else8 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_9 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_9 ; RV64ZVE32F-NEXT: .LBB83_8: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) +; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB83_9: # %else10 -; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB83_16 +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: bnez a1, .LBB83_16 ; RV64ZVE32F-NEXT: .LBB83_11: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB83_12: # %cond.store3 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_6 +; RV64ZVE32F-NEXT: vse32.v v12, (a2) +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_6 ; RV64ZVE32F-NEXT: .LBB83_13: # %cond.store5 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 3 -; RV64ZVE32F-NEXT: vse32.v v10, (a3) -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB83_7 +; RV64ZVE32F-NEXT: vse32.v v10, (a2) +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: beqz a2, .LBB83_7 ; RV64ZVE32F-NEXT: .LBB83_14: # %cond.store7 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v11 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v11 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB83_8 +; RV64ZVE32F-NEXT: vse32.v v12, (a2) +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: bnez a2, .LBB83_8 ; RV64ZVE32F-NEXT: j .LBB83_9 ; RV64ZVE32F-NEXT: .LBB83_15: # %cond.store11 -; RV64ZVE32F-NEXT: vmv.x.s a3, v10 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 2 -; RV64ZVE32F-NEXT: add a3, a0, a3 +; RV64ZVE32F-NEXT: vmv.x.s a2, v10 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 46 +; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 6 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vse32.v v12, (a3) -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB83_11 +; RV64ZVE32F-NEXT: vse32.v v12, (a2) +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: beqz a1, .LBB83_11 ; RV64ZVE32F-NEXT: .LBB83_16: # %cond.store13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v10 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v10 +; RV64ZVE32F-NEXT: slli a1, a1, 48 +; RV64ZVE32F-NEXT: srli a1, a1, 46 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 7 @@ -11270,101 +11264,99 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8f64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lui a1, 16 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: addiw a1, a1, -1 -; RV64ZVE32F-NEXT: beqz a3, .LBB96_2 +; RV64ZVE32F-NEXT: vmv.x.s a1, v0 +; RV64ZVE32F-NEXT: andi a2, a1, 1 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa0, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa0, 0(a2) ; RV64ZVE32F-NEXT: .LBB96_2: # %else -; RV64ZVE32F-NEXT: andi a3, a2, 2 -; RV64ZVE32F-NEXT: beqz a3, .LBB96_4 +; RV64ZVE32F-NEXT: andi a2, a1, 2 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa1, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa1, 0(a2) ; RV64ZVE32F-NEXT: .LBB96_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB96_12 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: bnez a3, .LBB96_13 +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_13 ; RV64ZVE32F-NEXT: .LBB96_6: # %else6 -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: bnez a3, .LBB96_14 +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_14 ; RV64ZVE32F-NEXT: .LBB96_7: # %else8 -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: beqz a3, .LBB96_9 +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_9 ; RV64ZVE32F-NEXT: .LBB96_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa5, 0(a3) +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB96_9: # %else10 -; RV64ZVE32F-NEXT: andi a3, a2, 64 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 -; RV64ZVE32F-NEXT: bnez a3, .LBB96_15 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: bnez a2, .LBB96_16 +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: bnez a1, .LBB96_16 ; RV64ZVE32F-NEXT: .LBB96_11: # %else14 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB96_12: # %cond.store3 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa2, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 8 -; RV64ZVE32F-NEXT: beqz a3, .LBB96_6 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa2, 0(a2) +; RV64ZVE32F-NEXT: andi a2, a1, 8 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_6 ; RV64ZVE32F-NEXT: .LBB96_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa3, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 16 -; RV64ZVE32F-NEXT: beqz a3, .LBB96_7 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa3, 0(a2) +; RV64ZVE32F-NEXT: andi a2, a1, 16 +; RV64ZVE32F-NEXT: beqz a2, .LBB96_7 ; RV64ZVE32F-NEXT: .LBB96_14: # %cond.store7 -; RV64ZVE32F-NEXT: vmv.x.s a3, v9 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa4, 0(a3) -; RV64ZVE32F-NEXT: andi a3, a2, 32 -; RV64ZVE32F-NEXT: bnez a3, .LBB96_8 +; RV64ZVE32F-NEXT: vmv.x.s a2, v9 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa4, 0(a2) +; RV64ZVE32F-NEXT: andi a2, a1, 32 +; RV64ZVE32F-NEXT: bnez a2, .LBB96_8 ; RV64ZVE32F-NEXT: j .LBB96_9 ; RV64ZVE32F-NEXT: .LBB96_15: # %cond.store11 -; RV64ZVE32F-NEXT: vmv.x.s a3, v8 -; RV64ZVE32F-NEXT: and a3, a3, a1 -; RV64ZVE32F-NEXT: slli a3, a3, 3 -; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: fsd fa6, 0(a3) -; RV64ZVE32F-NEXT: andi a2, a2, -128 -; RV64ZVE32F-NEXT: beqz a2, .LBB96_11 +; RV64ZVE32F-NEXT: vmv.x.s a2, v8 +; RV64ZVE32F-NEXT: slli a2, a2, 48 +; RV64ZVE32F-NEXT: srli a2, a2, 45 +; RV64ZVE32F-NEXT: add a2, a0, a2 +; RV64ZVE32F-NEXT: fsd fa6, 0(a2) +; RV64ZVE32F-NEXT: andi a1, a1, -128 +; RV64ZVE32F-NEXT: beqz a1, .LBB96_11 ; RV64ZVE32F-NEXT: .LBB96_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 -; RV64ZVE32F-NEXT: vmv.x.s a2, v8 -; RV64ZVE32F-NEXT: and a1, a2, a1 -; RV64ZVE32F-NEXT: slli a1, a1, 3 +; RV64ZVE32F-NEXT: vmv.x.s a1, v8 +; RV64ZVE32F-NEXT: slli a1, a1, 48 +; RV64ZVE32F-NEXT: srli a1, a1, 45 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: fsd fa7, 0(a0) ; RV64ZVE32F-NEXT: ret